diff --git a/pg_lake_engine/include/pg_lake/util/catalog_type.h b/pg_lake_engine/include/pg_lake/util/catalog_type.h index 2db12898..50e7a37c 100644 --- a/pg_lake_engine/include/pg_lake/util/catalog_type.h +++ b/pg_lake_engine/include/pg_lake/util/catalog_type.h @@ -17,13 +17,34 @@ #pragma once +/* FDW name for iceberg_catalog servers */ +#define ICEBERG_CATALOG_FDW_NAME "iceberg_catalog" + /* * The allowed values for IcebergDefaultCatalog, case insensitive. +* +* These are the user-facing short names used as the catalog= option value +* on CREATE TABLE ... USING iceberg. Internally they map to the +* prefixed built-in server names below; users never type the prefixed +* names directly. */ #define POSTGRES_CATALOG_NAME "postgres" #define OBJECT_STORE_CATALOG_NAME "object_store" #define REST_CATALOG_NAME "rest" +/* + * Built-in iceberg_catalog server names. Pre-created by the extension + * upgrade script and exist purely as anchors for pg_depend edges and the + * uniform server-lookup path. All ALTER/DROP/RENAME on these names is + * blocked (configuration for the built-in catalogs lives in GUCs). + * + * The prefix keeps them clear of names users are likely to have already + * used (notably the very common "CREATE SERVER postgres FDW postgres_fdw"). + */ +#define PG_LAKE_POSTGRES_CATALOG_SERVER_NAME "pg_lake_postgres_catalog" +#define PG_LAKE_OBJECT_STORE_CATALOG_SERVER_NAME "pg_lake_object_store_catalog" +#define PG_LAKE_REST_CATALOG_SERVER_NAME "pg_lake_rest_catalog" + typedef enum IcebergCatalogType { NONE_CATALOG = 0, @@ -61,3 +82,7 @@ extern PGDLLEXPORT IcebergCatalogType GetIcebergCatalogType(Oid relationId); extern PGDLLEXPORT bool HasRestCatalogTableOption(List *options); extern PGDLLEXPORT bool HasObjectStoreCatalogTableOption(List *options); extern PGDLLEXPORT bool HasReadOnlyOption(List *options); +extern PGDLLEXPORT bool IsCatalogOwnedByExtension(const char *catalog); +extern PGDLLEXPORT bool IsRestCatalog(const char *catalog); +extern PGDLLEXPORT const char *ResolveCatalogServerName(const char *catalog); +extern PGDLLEXPORT bool IsBuiltinCatalogServerName(const char *serverName); diff --git a/pg_lake_engine/src/utils/catalog_type.c b/pg_lake_engine/src/utils/catalog_type.c index cbf678ac..905aa238 100644 --- a/pg_lake_engine/src/utils/catalog_type.c +++ b/pg_lake_engine/src/utils/catalog_type.c @@ -68,15 +68,16 @@ GetIcebergCatalogType(Oid relationId) /* - * HasRestCatalogTableOption returns true if the options contain - * catalog='rest'. + * HasRestCatalogTableOption returns true if the catalog option indicates a + * REST catalog: either the literal value 'rest' or the name of an + * iceberg_catalog foreign server with TYPE 'rest'. */ bool HasRestCatalogTableOption(List *options) { char *catalog = GetStringOption(options, "catalog", false); - return catalog ? pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(catalog)) == 0 : false; + return IsRestCatalog(catalog); } @@ -89,7 +90,7 @@ HasObjectStoreCatalogTableOption(List *options) { char *catalog = GetStringOption(options, "catalog", false); - return catalog ? pg_strncasecmp(catalog, OBJECT_STORE_CATALOG_NAME, strlen(catalog)) == 0 : false; + return catalog ? pg_strcasecmp(catalog, OBJECT_STORE_CATALOG_NAME) == 0 : false; } @@ -104,3 +105,119 @@ HasReadOnlyOption(List *options) return readOnly ? pg_strncasecmp(readOnly, "true", strlen("true")) == 0 : false; } + + +/* + * IsCatalogOwnedByExtension returns true if the catalog name is one of + * the reserved built-in names: 'rest', 'object_store', or 'postgres'. + * Comparison is case-insensitive. + */ +bool +IsCatalogOwnedByExtension(const char *catalog) +{ + return pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0 || + pg_strcasecmp(catalog, OBJECT_STORE_CATALOG_NAME) == 0 || + pg_strcasecmp(catalog, POSTGRES_CATALOG_NAME) == 0; +} + + +/* + * IsRestCatalog returns true if the catalog name identifies a REST catalog. + * This includes the built-in 'rest' literal and any user-created + * iceberg_catalog server whose TYPE is 'rest'. + * + * The internal built-in server names (e.g. "pg_lake_rest_catalog") are + * deliberately rejected: they are implementation details and must not be + * usable as catalog= option values on CREATE TABLE. Users always type + * the short name "rest", which is mapped to the long server name only + * inside the resolution layer. + */ +bool +IsRestCatalog(const char *catalog) +{ + if (catalog == NULL) + return false; + + if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0) + return true; + + if (IsBuiltinCatalogServerName(catalog)) + return false; + + /* Try to look up a server with this name */ + bool missingOK = true; + ForeignServer *server = GetForeignServerByName(catalog, missingOK); + + if (server == NULL) + return false; + + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); + + if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) + return false; + + /* + * Any iceberg_catalog server reaching this point is user-created, and + * ValidateIcebergCatalogServerDDL forces all user-created iceberg_catalog + * servers to TYPE 'rest'. + */ + Assert(pg_strcasecmp(server->servertype, REST_CATALOG_NAME) == 0); + return true; +} + + +/* + * ResolveCatalogServerName maps a user-facing catalog identifier to the + * actual pg_foreign_server.srvname. + * + * For the three reserved short names ('postgres', 'object_store', 'rest') + * the result is the corresponding pre-created built-in server name. + * Any other input is returned unchanged (user-created server names match + * their catalog= option value verbatim). + * + * The returned pointer is either a string literal or the input pointer; + * callers must not free it. + */ +const char * +ResolveCatalogServerName(const char *catalog) +{ + if (catalog == NULL) + return NULL; + + if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0) + return PG_LAKE_REST_CATALOG_SERVER_NAME; + if (pg_strcasecmp(catalog, POSTGRES_CATALOG_NAME) == 0) + return PG_LAKE_POSTGRES_CATALOG_SERVER_NAME; + if (pg_strcasecmp(catalog, OBJECT_STORE_CATALOG_NAME) == 0) + return PG_LAKE_OBJECT_STORE_CATALOG_SERVER_NAME; + + return catalog; +} + + +/* + * IsBuiltinCatalogServerName returns true if the given name matches one + * of the three pre-created built-in iceberg_catalog servers. + * + * Comparison is case-insensitive: both PostgreSQL-parsed identifiers + * (already downcased by the parser unless quoted) and free-form string + * literals supplied as catalog= option values flow through this helper, + * and we want to reject typos like 'PG_LAKE_REST_CATALOG' just as + * firmly as the canonical form. + * + * This is the long-name counterpart to IsCatalogOwnedByExtension, which + * operates on the user-facing short names. Used by the DDL protection + * hook to lock down ALTER/RENAME/OWNER on the extension's structural + * anchors and by create_table.c to reject the long names as catalog= + * option values. + */ +bool +IsBuiltinCatalogServerName(const char *serverName) +{ + if (serverName == NULL) + return false; + + return pg_strcasecmp(serverName, PG_LAKE_REST_CATALOG_SERVER_NAME) == 0 || + pg_strcasecmp(serverName, PG_LAKE_POSTGRES_CATALOG_SERVER_NAME) == 0 || + pg_strcasecmp(serverName, PG_LAKE_OBJECT_STORE_CATALOG_SERVER_NAME) == 0; +} diff --git a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h index a6634062..36ba0357 100644 --- a/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h +++ b/pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h @@ -18,12 +18,13 @@ #pragma once #include "postgres.h" +#include "pg_lake/ddl/utility_hook.h" #include "pg_lake/http/http_client.h" #include "pg_lake/util/rel_utils.h" #include "pg_lake/parquet/field.h" #include "pg_lake/iceberg/api/snapshot.h" -#define REST_CATALOG_AUTH_TYPE_DEFAULT (0) +#define REST_CATALOG_AUTH_TYPE_OAUTH2 (0) #define REST_CATALOG_AUTH_TYPE_HORIZON (1) extern PGDLLEXPORT char *RestCatalogHost; @@ -34,6 +35,37 @@ extern char *RestCatalogScope; extern int RestCatalogAuthType; extern bool RestCatalogEnableVendedCredentials; +/* + * Resolved REST catalog connection options. All REST catalogs -- + * built-in ('rest') and user-created (CREATE SERVER ... FOREIGN DATA + * WRAPPER iceberg_catalog) -- are backed by a real pg_foreign_server + * row; ApplyGUCDefaults populates the defaults, ApplyServerOptionOverrides + * layers on any per-server options. + * + * The canonical identity of a catalog is `serverOid` (the OID of the + * iceberg_catalog server row). Use it for in-memory equality, token + * cache keys, and syscache-driven invalidation. `catalog` stores the + * user-visible short name (e.g. 'rest', 'my_polaris') purely for error + * messages. + */ +typedef struct RestCatalogOptions +{ + Oid serverOid; /* iceberg_catalog server OID; canonical + * identity, never InvalidOid for resolved + * opts */ + char *catalog; /* short user-facing name; used in error + * messages, never for equality */ + char *host; + char *oauthHostPath; + char *clientId; + char *clientSecret; + char *scope; + char *locationPrefix; + char *catalogName; /* REST API catalog prefix; defaults to dbname */ + int authType; + bool enableVendedCredentials; +} RestCatalogOptions; + #define REST_CATALOG_AUTH_TOKEN_PATH "%s/api/catalog/v1/oauth/tokens" #define REST_CATALOG_NAMESPACE_NAME "%s/api/catalog/v1/%s/namespaces/%s" @@ -77,24 +109,32 @@ typedef struct RestCatalogRequest #define REST_CATALOG_AUTH_TOKEN_PATH "%s/api/catalog/v1/oauth/tokens" #define GET_REST_CATALOG_METADATA_LOCATION "%s/api/catalog/v1/%s/namespaces/%s/tables/%s" -extern PGDLLEXPORT void RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceName); +/* Catalog options resolution */ +extern PGDLLEXPORT RestCatalogOptions * ResolveRestCatalogOptions(const char *catalog); +extern PGDLLEXPORT RestCatalogOptions * GetRestCatalogOptionsForRelation(Oid relationId); +extern PGDLLEXPORT RestCatalogOptions * CopyRestCatalogOptions(MemoryContext dst, const RestCatalogOptions * src); + +extern PGDLLEXPORT void RegisterNamespaceToRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName); extern PGDLLEXPORT void StartStageRestCatalogIcebergTableCreate(Oid relationId); extern PGDLLEXPORT char *FinishStageRestCatalogIcebergTableCreateRestRequest(Oid relationId, DataFileSchema * dataFileSchema, List *partitionSpecs); -extern PGDLLEXPORT void ErrorIfRestNamespaceDoesNotExist(const char *catalogName, const char *namespaceName); +extern PGDLLEXPORT void ErrorIfRestNamespaceDoesNotExist(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName); extern PGDLLEXPORT char *GetRestCatalogName(Oid relationId); extern PGDLLEXPORT char *GetRestCatalogNamespace(Oid relationId); extern PGDLLEXPORT char *GetRestCatalogTableName(Oid relationId); extern PGDLLEXPORT bool IsReadOnlyRestCatalogIcebergTable(Oid relationId); -extern PGDLLEXPORT char *GetMetadataLocationFromRestCatalog(const char *restCatalogName, const char *namespaceName, +extern PGDLLEXPORT char *GetMetadataLocationFromRestCatalog(RestCatalogOptions * opts, const char *restCatalogName, const char *namespaceName, const char *relationName); extern PGDLLEXPORT char *GetMetadataLocationForRestCatalogForIcebergTable(Oid relationId); extern PGDLLEXPORT void ReportHTTPError(HttpResult httpResult, int level); -extern PGDLLEXPORT List *PostHeadersWithAuth(void); -extern PGDLLEXPORT List *DeleteHeadersWithAuth(void); -extern PGDLLEXPORT HttpResult SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, List *headers); +extern PGDLLEXPORT List *PostHeadersWithAuth(RestCatalogOptions * opts); +extern PGDLLEXPORT List *DeleteHeadersWithAuth(RestCatalogOptions * opts); +extern PGDLLEXPORT HttpResult SendRequestToRestCatalog(RestCatalogOptions * opts, HttpMethod method, const char *url, const char *body, List *headers); extern PGDLLEXPORT RestCatalogRequest * GetAddSnapshotCatalogRequest(IcebergSnapshot * newSnapshot, Oid relationId); extern PGDLLEXPORT RestCatalogRequest * GetAddSchemaCatalogRequest(Oid relationId, DataFileSchema * dataFileSchema); extern PGDLLEXPORT RestCatalogRequest * GetSetCurrentSchemaCatalogRequest(Oid relationId, int32_t schemaId); extern PGDLLEXPORT RestCatalogRequest * GetAddPartitionCatalogRequest(Oid relationId, List *partitionSpec); extern PGDLLEXPORT RestCatalogRequest * GetSetPartitionDefaultIdCatalogRequest(Oid relationId, int specId); extern PGDLLEXPORT RestCatalogRequest * GetRemoveSnapshotCatalogRequest(List *removedSnapshotIds, Oid relationId); + +/* ProcessUtility handler for iceberg_catalog server DDL validation */ +extern PGDLLEXPORT bool ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, void *arg); diff --git a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql index 11b0f619..286cf23a 100644 --- a/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql +++ b/pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql @@ -14,4 +14,3 @@ CREATE OR REPLACE VIEW pg_catalog.iceberg_tables AS SELECT catalog_name, table_namespace, table_name, metadata_location, previous_metadata_location FROM lake_iceberg.tables WHERE metadata_location IS NOT NULL; - diff --git a/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql b/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql index 27fca375..6fb0909c 100644 --- a/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql +++ b/pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql @@ -1 +1,82 @@ -- Upgrade script for pg_lake_iceberg from 3.3 to 3.4 + +/* + * iceberg_catalog foreign data wrapper: allows defining named catalog + * configurations via CREATE SERVER so that users are not limited to a + * single global REST catalog configured through GUC settings. + * + * Example: + * CREATE SERVER my_polaris TYPE 'rest' + * FOREIGN DATA WRAPPER iceberg_catalog + * OPTIONS (rest_endpoint 'http://polaris:8181', + * rest_auth_type 'default', + * client_id '...', + * client_secret '...'); + * + * CREATE TABLE t (a int) USING iceberg WITH (catalog = 'my_polaris'); + */ +CREATE FUNCTION lake_iceberg.iceberg_catalog_validator(text[], oid) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT; + +CREATE FOREIGN DATA WRAPPER iceberg_catalog + NO HANDLER + VALIDATOR lake_iceberg.iceberg_catalog_validator; + +GRANT USAGE ON FOREIGN DATA WRAPPER iceberg_catalog TO lake_write; + +/* + * Built-in catalog servers. + * + * These three servers are pre-created as structural anchors for the + * pg_depend dependency edges that iceberg tables record against their + * catalog server. They are extension-owned and immutable: ALTER, DROP, + * RENAME, and OWNER changes on them are all blocked. Configuration + * for the built-in catalogs lives in GUCs, not in server options. + * + * Users keep typing the short names ('postgres', 'object_store', 'rest') + * as the catalog= option value on CREATE TABLE; ResolveCatalogServerName + * maps short -> long at server lookup time. The long names are prefixed + * so they cannot collide with names users may already have in their + * databases (e.g. a postgres_fdw server literally named 'postgres'). + * + * Pre-flight: error early with a clear hint if any of the long names is + * already in use. This prevents a confusing "server already exists" + * mid-upgrade. + */ +DO $do$ +DECLARE + conflicting text; +BEGIN + SELECT srvname INTO conflicting + FROM pg_foreign_server + WHERE srvname IN ('pg_lake_postgres_catalog', + 'pg_lake_object_store_catalog', + 'pg_lake_rest_catalog') + LIMIT 1; + + IF conflicting IS NOT NULL THEN + RAISE EXCEPTION + 'pg_lake_iceberg upgrade conflicts with existing foreign server %', conflicting + USING HINT = 'Drop or rename the server and re-run ALTER EXTENSION pg_lake_iceberg UPDATE. ' + 'pg_lake_iceberg reserves the names pg_lake_postgres_catalog, ' + 'pg_lake_object_store_catalog, and pg_lake_rest_catalog for internal use.'; + END IF; +END $do$; + +CREATE SERVER pg_lake_postgres_catalog + TYPE 'postgres' + FOREIGN DATA WRAPPER iceberg_catalog; + +CREATE SERVER pg_lake_object_store_catalog + TYPE 'object_store' + FOREIGN DATA WRAPPER iceberg_catalog; + +CREATE SERVER pg_lake_rest_catalog + TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog; + +GRANT USAGE ON FOREIGN SERVER pg_lake_postgres_catalog TO lake_write; +GRANT USAGE ON FOREIGN SERVER pg_lake_object_store_catalog TO lake_write; +GRANT USAGE ON FOREIGN SERVER pg_lake_rest_catalog TO lake_write; diff --git a/pg_lake_iceberg/src/init.c b/pg_lake_iceberg/src/init.c index 63972e7b..ac4b4067 100644 --- a/pg_lake_iceberg/src/init.c +++ b/pg_lake_iceberg/src/init.c @@ -27,6 +27,7 @@ #include "pg_lake/avro/avro_reader.h" #include "pg_lake/avro/avro_writer.h" #include "pg_lake/copy/copy_format.h" +#include "pg_lake/ddl/utility_hook.h" #include "pg_lake/iceberg/api.h" #include "pg_lake/pgduck/numeric.h" #include "pg_lake/iceberg/catalog.h" @@ -35,6 +36,8 @@ #include "pg_lake/iceberg/operations/vacuum.h" #include "pg_lake/object_store_catalog/object_store_catalog.h" #include "pg_lake/rest_catalog/rest_catalog.h" +#include "pg_lake/util/catalog_type.h" +#include "access/xact.h" #define GUC_STANDARD 0 @@ -59,7 +62,8 @@ void _PG_init(void); /* pg_lake_iceberg.rest_catalog_auth_type */ static const struct config_enum_entry RestCatalogAuthTypeOptions[] = { - {"default", REST_CATALOG_AUTH_TYPE_DEFAULT, false}, + {"oauth2", REST_CATALOG_AUTH_TYPE_OAUTH2, false}, + {"default", REST_CATALOG_AUTH_TYPE_OAUTH2, false}, {"horizon", REST_CATALOG_AUTH_TYPE_HORIZON, false}, {NULL, 0, false}, }; @@ -256,7 +260,7 @@ _PG_init(void) gettext_noop("Determines the format for the initial OAuth token requests."), NULL, &RestCatalogAuthType, - REST_CATALOG_AUTH_TYPE_DEFAULT, + REST_CATALOG_AUTH_TYPE_OAUTH2, RestCatalogAuthTypeOptions, PGC_SUSET, GUC_SUPERUSER_ONLY | GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE, @@ -329,6 +333,8 @@ _PG_init(void) NULL, NULL, NULL); AvroInit(); + + RegisterUtilityStatementHandler(ValidateIcebergCatalogServerDDL, NULL); } @@ -366,13 +372,27 @@ IcebergDefaultCatalogCheckHook(char **newvalue, void **extra, GucSource source) { char *newCatalog = *newvalue; - if (pg_strncasecmp(newCatalog, POSTGRES_CATALOG_NAME, strlen(newCatalog)) == 0 || - pg_strncasecmp(newCatalog, REST_CATALOG_NAME, strlen(newCatalog)) == 0 || - pg_strncasecmp(newCatalog, OBJECT_STORE_CATALOG_NAME, strlen(newCatalog)) == 0) + if (pg_strcasecmp(newCatalog, POSTGRES_CATALOG_NAME) == 0 || + pg_strcasecmp(newCatalog, REST_CATALOG_NAME) == 0 || + pg_strcasecmp(newCatalog, OBJECT_STORE_CATALOG_NAME) == 0) + return true; + + /* + * Outside a transaction we cannot do catalog lookups to verify that the + * name refers to a valid iceberg_catalog server. Accept the value on + * faith; an invalid name will error at first use. This mirrors how + * PostgreSQL handles check_default_tablespace (see + * src/backend/commands/tablespace.c). + */ + if (!IsTransactionState()) + return true; + + if (IsRestCatalog(newCatalog)) return true; GUC_check_errdetail("pg_lake_iceberg: allowed iceberg catalog options are '" POSTGRES_CATALOG_NAME "', " - " '" REST_CATALOG_NAME "' and '" OBJECT_STORE_CATALOG_NAME "'"); + "'" REST_CATALOG_NAME "', '" OBJECT_STORE_CATALOG_NAME + "', or the name of a user-created iceberg_catalog server with TYPE 'rest'"); return false; } diff --git a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c index 8cf2b2ae..58a2a3e5 100644 --- a/pg_lake_iceberg/src/rest_catalog/rest_catalog.c +++ b/pg_lake_iceberg/src/rest_catalog/rest_catalog.c @@ -20,14 +20,26 @@ #include "postgres.h" #include "miscadmin.h" +#include "access/genam.h" +#include "access/reloptions.h" +#include "access/table.h" +#include "catalog/pg_class.h" +#include "catalog/pg_depend.h" +#include "catalog/pg_foreign_server.h" #include "common/base64.h" #include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "commands/extension.h" #include "foreign/foreign.h" +#include "fmgr.h" #include "lib/stringinfo.h" #include "utils/builtins.h" +#include "utils/fmgroids.h" +#include "utils/inval.h" #include "utils/jsonb.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/syscache.h" #include "utils/timestamp.h" #include "pg_extension_base/base_workers.h" @@ -40,8 +52,10 @@ #include "pg_lake/object_store_catalog/object_store_catalog.h" #include "pg_lake/parsetree/options.h" #include "pg_lake/rest_catalog/rest_catalog.h" +#include "pg_lake/util/catalog_type.h" #include "pg_lake/util/url_encode.h" #include "pg_lake/util/rel_utils.h" +#include "pg_lake/util/string_utils.h" /* determined by GUC */ @@ -50,21 +64,31 @@ char *RestCatalogOauthHostPath = ""; char *RestCatalogClientId = NULL; char *RestCatalogClientSecret = NULL; char *RestCatalogScope = "PRINCIPAL_ROLE:ALL"; -int RestCatalogAuthType = REST_CATALOG_AUTH_TYPE_DEFAULT; +int RestCatalogAuthType = REST_CATALOG_AUTH_TYPE_OAUTH2; bool RestCatalogEnableVendedCredentials = true; /* -* Should always be accessed via GetRestCatalogAccessToken() -*/ -static char *RestCatalogAccessToken = NULL; -static TimestampTz RestCatalogAccessTokenExpiry = 0; + * Per-rest-catalog token cache, keyed by the iceberg_catalog server OID. + * Should always be accessed via GetRestCatalogAccessToken(). + */ +typedef struct RestCatalogTokenCacheEntry +{ + Oid serverOid; /* hash key */ + char *accessToken; + TimestampTz accessTokenExpiry; +} RestCatalogTokenCacheEntry; + +static HTAB *RestCatalogTokenCache = NULL; +static MemoryContext RestTokenCacheCtx = NULL; + +/* end of per-catalog token cache variables */ -static char *GetRestCatalogAccessToken(bool forceRefreshToken); -static void FetchRestCatalogAccessToken(char **accessToken, int *expiresIn); -static void CreateNamespaceOnRestCatalog(const char *catalogName, const char *namespaceName); +static char *GetRestCatalogAccessToken(RestCatalogOptions * opts, bool forceRefreshToken); +static void FetchRestCatalogAccessToken(RestCatalogOptions * opts, char **accessToken, int *expiresIn); +static void CreateNamespaceOnRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName); static char *EncodeBasicAuth(const char *clientId, const char *clientSecret); static char *JsonbGetStringByPath(const char *jsonb_text, int nkeys,...); -static List *GetHeadersWithAuth(void); +static List *GetHeadersWithAuth(RestCatalogOptions * opts); static char *AppendIcebergPartitionSpecForRestCatalog(List *partitionSpecs); static void UpdateAuthorizationHeader(List *headers, const char *token); @@ -79,6 +103,666 @@ typedef enum RestCatalogRequestRetryAction REST_CATALOG_RETRY_REFRESH_AUTH /* 419 Token Expired */ } RestCatalogRequestRetryAction; +PG_FUNCTION_INFO_V1(iceberg_catalog_validator); + + +/* + * Descriptor for a single iceberg_catalog server option. This is the + * single source of truth: validation, the user-facing hint, and the + * option-to-struct applier all derive from this table. + */ +typedef enum IcebergCatalogOptionType +{ + CATALOG_OPT_STRING, + CATALOG_OPT_BOOL, + CATALOG_OPT_AUTH_TYPE, + CATALOG_OPT_LOCATION_PREFIX +} IcebergCatalogOptionType; + +/* Validation flags checked at CREATE/ALTER SERVER time. */ +#define CATALOG_OPT_NONEMPTY 0x01 /* reject empty string */ +#define CATALOG_OPT_HAS_SCHEME 0x02 /* must contain "://" */ + +typedef struct IcebergCatalogOptionDesc +{ + const char *name; + IcebergCatalogOptionType type; + size_t offset; /* offsetof into RestCatalogOptions */ + int flags; /* CATALOG_OPT_NONEMPTY | + * CATALOG_OPT_HAS_SCHEME */ +} IcebergCatalogOptionDesc; + +static const IcebergCatalogOptionDesc iceberg_catalog_option_descs[] = { + {"rest_endpoint", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, host), + CATALOG_OPT_NONEMPTY | CATALOG_OPT_HAS_SCHEME}, + {"rest_auth_type", CATALOG_OPT_AUTH_TYPE, offsetof(RestCatalogOptions, authType), 0}, + {"oauth_endpoint", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, oauthHostPath), + CATALOG_OPT_NONEMPTY | CATALOG_OPT_HAS_SCHEME}, + {"scope", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, scope), + CATALOG_OPT_NONEMPTY}, + {"enable_vended_credentials", CATALOG_OPT_BOOL, offsetof(RestCatalogOptions, enableVendedCredentials), 0}, + {"location_prefix", CATALOG_OPT_LOCATION_PREFIX, offsetof(RestCatalogOptions, locationPrefix), + CATALOG_OPT_NONEMPTY | CATALOG_OPT_HAS_SCHEME}, + {"catalog_name", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, catalogName), + CATALOG_OPT_NONEMPTY}, + {"client_id", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, clientId), + CATALOG_OPT_NONEMPTY}, + {"client_secret", CATALOG_OPT_STRING, offsetof(RestCatalogOptions, clientSecret), + CATALOG_OPT_NONEMPTY}, +}; + +#define NUM_CATALOG_OPTIONS lengthof(iceberg_catalog_option_descs) + + +/* + * Look up a descriptor by option name, or return NULL if not found. + */ +static const IcebergCatalogOptionDesc * +FindCatalogOptionDesc(const char *name) +{ + for (int i = 0; i < NUM_CATALOG_OPTIONS; i++) + { + if (pg_strcasecmp(name, iceberg_catalog_option_descs[i].name) == 0) + return &iceberg_catalog_option_descs[i]; + } + return NULL; +} + + +/* + * Build the "Valid options are: ?" hint string. Cached after first call. + * + * The cache must outlive any individual transaction: this helper is only + * called on validator error paths, and ereport(ERROR) immediately aborts + * the current transaction, freeing whatever short-lived context the + * validator was running under. Allocate the buffer in TopMemoryContext + * so the static pointer remains valid for the lifetime of the backend. + */ +static const char * +GetValidCatalogOptionsHint(void) +{ + static char *hint = NULL; + + if (hint == NULL) + { + MemoryContext oldcxt; + StringInfoData buf; + + oldcxt = MemoryContextSwitchTo(TopMemoryContext); + initStringInfo(&buf); + appendStringInfoString(&buf, "Valid options are: "); + for (int i = 0; i < NUM_CATALOG_OPTIONS; i++) + { + if (i > 0) + appendStringInfoString(&buf, ", "); + appendStringInfoString(&buf, iceberg_catalog_option_descs[i].name); + } + appendStringInfoChar(&buf, '.'); + hint = buf.data; + MemoryContextSwitchTo(oldcxt); + } + + return hint; +} + + +/* + * Validate a single option value. Called from iceberg_catalog_validator + * after the name has already been accepted. Type-specific checks run + * first, then flag-based checks (non-empty, scheme present). + */ +static void +ValidateCatalogOptionValue(const IcebergCatalogOptionDesc * desc, DefElem *def) +{ + switch (desc->type) + { + case CATALOG_OPT_AUTH_TYPE: + { + char *authType = defGetString(def); + + if (pg_strcasecmp(authType, "oauth2") != 0 && + pg_strcasecmp(authType, "default") != 0 && + pg_strcasecmp(authType, "horizon") != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid rest_auth_type option: \"%s\"", authType), + errhint("Valid values are \"oauth2\" and \"horizon\"."))); + return; + } + case CATALOG_OPT_BOOL: + (void) defGetBoolean(def); + return; + default: + break; + } + + if (desc->flags == 0) + return; + + char *value = defGetString(def); + + if ((desc->flags & CATALOG_OPT_NONEMPTY) && value[0] == '\0') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for \"%s\": must not be empty", + desc->name))); + + if ((desc->flags & CATALOG_OPT_HAS_SCHEME) && strstr(value, "://") == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid value for \"%s\": \"%s\"", + desc->name, value), + errhint("Include a URI scheme (e.g. \"https://...\")."))); +} + + +/* + * Apply a single server option onto the RestCatalogOptions struct. + * Called from ApplyServerOptionOverrides for each DefElem on the server. + */ +static void +ApplyCatalogOptionValue(RestCatalogOptions * opts, + const IcebergCatalogOptionDesc * desc, DefElem *def) +{ + switch (desc->type) + { + case CATALOG_OPT_STRING: + *(char **) ((char *) opts + desc->offset) = pstrdup(defGetString(def)); + break; + case CATALOG_OPT_BOOL: + *(bool *) ((char *) opts + desc->offset) = defGetBoolean(def); + break; + case CATALOG_OPT_AUTH_TYPE: + { + char *authType = defGetString(def); + + *(int *) ((char *) opts + desc->offset) = + (pg_strcasecmp(authType, "horizon") == 0) + ? REST_CATALOG_AUTH_TYPE_HORIZON + : REST_CATALOG_AUTH_TYPE_OAUTH2; + break; + } + case CATALOG_OPT_LOCATION_PREFIX: + { + bool inPlace = false; + + *(char **) ((char *) opts + desc->offset) = + pstrdup(StripTrailingSlash(defGetString(def), inPlace)); + break; + } + } +} + + +/* + * iceberg_catalog_validator validates options for the iceberg_catalog FDW. + * Only server-level options are supported. + */ +Datum +iceberg_catalog_validator(PG_FUNCTION_ARGS) +{ + List *options_list = untransformRelOptions(PG_GETARG_DATUM(0)); + Oid catalogRelId = PG_GETARG_OID(1); + ListCell *cell; + + /* + * PostgreSQL calls the validator for CREATE FOREIGN DATA WRAPPER itself + * (with ForeignDataWrapperRelationId), not just for CREATE SERVER. Allow + * empty option lists for non-server contexts so extension creation + * succeeds, but still reject if someone passes options where they don't + * belong. + */ + if (catalogRelId != ForeignServerRelationId) + { + if (list_length(options_list) > 0) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), + errmsg("iceberg_catalog options are only valid for SERVER objects"))); + PG_RETURN_VOID(); + } + + foreach(cell, options_list) + { + DefElem *def = (DefElem *) lfirst(cell); + const IcebergCatalogOptionDesc *desc = FindCatalogOptionDesc(def->defname); + + if (desc == NULL) + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_OPTION_NAME), + errmsg("invalid option \"%s\" for iceberg_catalog server", + def->defname), + errhint("%s", GetValidCatalogOptionsHint()))); + + ValidateCatalogOptionValue(desc, def); + } + + PG_RETURN_VOID(); +} + + +/* + * ServerHasDependentRestIcebergTable returns true if the given server + * has at least one dependent REST-backed iceberg table (read-only or + * writable) recorded in pg_depend. Used to block ALTER SERVER changes + * that would silently break existing tables, since both writable and + * read-only REST tables make REST API calls at runtime against the + * server's rest_endpoint. + */ +static bool +ServerHasDependentRestIcebergTable(Oid serverOid) +{ + Relation depRel; + ScanKeyData key[2]; + SysScanDesc scan; + HeapTuple tup; + bool found = false; + + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(ForeignServerRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(serverOid)); + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 2, key); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend depForm = (Form_pg_depend) GETSTRUCT(tup); + + if (depForm->classid != RelationRelationId) + continue; + + IcebergCatalogType type = GetIcebergCatalogType(depForm->objid); + + if (type == REST_CATALOG_READ_WRITE || type == REST_CATALOG_READ_ONLY) + { + found = true; + break; + } + } + + systable_endscan(scan); + table_close(depRel, AccessShareLock); + + return found; +} + + +/* + * RejectIfBuiltinCatalogServerName errors out if `name` is one of the + * three internal pg_lake_iceberg catalog server names. Shared by the + * CREATE SERVER and ALTER SERVER ... RENAME TO branches, where the + * concern is "users must not be able to create or end up with a server + * carrying one of these reserved names". + */ +static void +RejectIfBuiltinCatalogServerName(const char *name) +{ + if (!IsBuiltinCatalogServerName(name)) + return; + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("server name \"%s\" is reserved for an internal " + "pg_lake_iceberg catalog server", name), + errhint("Choose a different server name."))); +} + + +/* + * RejectIfModifyingBuiltinCatalogServer errors out if `name` refers to + * one of the three built-in pg_lake_iceberg catalog servers. The + * `operation` verb fills in the user-facing error ("cannot %s the + * built-in...") and should read naturally in that template (e.g. + * "alter", "change owner of"). Shared by the ALTER SERVER OPTIONS + * and ALTER SERVER ... OWNER TO branches. + */ +static void +RejectIfModifyingBuiltinCatalogServer(const char *name, const char *operation) +{ + if (!IsBuiltinCatalogServerName(name)) + return; + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot %s the built-in pg_lake_iceberg " + "catalog server \"%s\"", operation, name), + errhint("Configure built-in catalogs via " + "pg_lake_iceberg GUCs, or create a " + "user-defined iceberg_catalog server."))); +} + + +/* + * ValidateIcebergCatalogServerDDL validates DDL on iceberg_catalog servers. + * + * Two layers of protection: + * + * 1. Short reserved names ('postgres', 'object_store', 'rest') -- these + * are the user-facing catalog= values. We block CREATE SERVER and + * RENAME TO these names so users can't shadow the built-in catalogs. + * + * 2. Built-in long server names ('pg_lake_postgres_catalog', etc.) -- + * these are the pre-created anchors. Outside of CREATE/ALTER + * EXTENSION we block CREATE/ALTER/RENAME/OWNER on them entirely so + * they remain pure structural anchors with all configuration in + * GUCs. DROP is blocked by core via extension membership. + * + * Additionally: + * - CREATE SERVER must specify TYPE 'rest'; TYPE 'postgres' and + * 'object_store' are reserved for the built-in servers. + * - Renaming any iceberg_catalog server is blocked (dependent tables + * record the server name as a string option in ftoptions). + * - For user-created REST servers, ALTER SERVER OPTIONS may not change + * rest_endpoint while dependent iceberg tables exist. + */ +bool +ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, + void *arg) +{ + Node *parsetree = processUtilityParams->plannedStmt->utilityStmt; + + if (creating_extension) + return false; + + if (IsA(parsetree, CreateForeignServerStmt)) + { + CreateForeignServerStmt *stmt = (CreateForeignServerStmt *) parsetree; + + if (stmt->fdwname == NULL || + strcmp(stmt->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) + return false; + + if (IsCatalogOwnedByExtension(stmt->servername)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("server name \"%s\" is reserved for the extension-owned catalog", + stmt->servername), + errhint("Choose a different server name."))); + + RejectIfBuiltinCatalogServerName(stmt->servername); + + if (stmt->servertype != NULL && + (pg_strcasecmp(stmt->servertype, POSTGRES_CATALOG_NAME) == 0 || + pg_strcasecmp(stmt->servertype, OBJECT_STORE_CATALOG_NAME) == 0)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create iceberg_catalog server with TYPE '%s'", + stmt->servertype), + errhint("Use the built-in \"%s\" or \"%s\" catalogs, " + "or create a server of type 'rest'.", + POSTGRES_CATALOG_NAME, OBJECT_STORE_CATALOG_NAME))); + + if (stmt->servertype == NULL || + pg_strcasecmp(stmt->servertype, REST_CATALOG_NAME) != 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("iceberg_catalog server requires TYPE 'rest'"))); + } + else if (IsA(parsetree, RenameStmt)) + { + RenameStmt *stmt = (RenameStmt *) parsetree; + + if (stmt->renameType != OBJECT_FOREIGN_SERVER) + return false; + + if (IsCatalogOwnedByExtension(stmt->newname)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("server name \"%s\" is reserved for the extension-owned catalog", + stmt->newname), + errhint("Choose a different server name."))); + + RejectIfBuiltinCatalogServerName(stmt->newname); + + /* + * Renaming an iceberg_catalog server is blocked because dependent + * iceberg tables store the server name as a string option + * (catalog='') in pg_foreign_table.ftoptions. A rename would + * silently break those references. + */ + ForeignServer *server = GetForeignServerByName(strVal(stmt->object), + true); + + if (server != NULL) + { + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); + + if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot rename iceberg_catalog server \"%s\"", + strVal(stmt->object)), + errhint("Drop and recreate the server with the new name."))); + } + } + else if (IsA(parsetree, AlterOwnerStmt)) + { + AlterOwnerStmt *stmt = (AlterOwnerStmt *) parsetree; + + if (stmt->objectType != OBJECT_FOREIGN_SERVER) + return false; + + const char *serverName = strVal(stmt->object); + + RejectIfModifyingBuiltinCatalogServer(serverName, "change owner of"); + } + else if (IsA(parsetree, AlterForeignServerStmt)) + { + AlterForeignServerStmt *stmt = (AlterForeignServerStmt *) parsetree; + + ForeignServer *server = GetForeignServerByName(stmt->servername, true); + + if (server == NULL) + return false; + + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); + + if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0) + return false; + + /* + * The three built-in servers are immutable structural anchors. + * Configuration for the built-in catalogs lives in GUCs; reject any + * option, version, or other ALTER change so we never end up with + * hidden server-side state that pg_dump cannot round-trip cleanly for + * an extension-owned object. + */ + RejectIfModifyingBuiltinCatalogServer(stmt->servername, "alter"); + + /* + * Changing rest_endpoint on a user-created server with dependent + * iceberg tables (writable or read-only) would silently point them at + * a different REST catalog, breaking the metadata chain. + */ + ListCell *lc; + + foreach(lc, stmt->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (pg_strcasecmp(def->defname, "rest_endpoint") == 0 && + ServerHasDependentRestIcebergTable(server->serverid)) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot change \"rest_endpoint\" on server \"%s\" " + "because it has dependent iceberg tables", + stmt->servername), + errhint("Drop the dependent tables first, or create a " + "new server with the desired endpoint."))); + } + } + } + + return false; +} + + + +/* + * ApplyGUCDefaults populates opts with the current GUC values. + * All string fields are pstrdup'd so the struct is self-contained. + */ +static void +ApplyGUCDefaults(RestCatalogOptions * opts) +{ + char *defaultLocationPrefix = GetIcebergDefaultLocationPrefix(); + + opts->host = RestCatalogHost ? pstrdup(RestCatalogHost) : NULL; + opts->oauthHostPath = RestCatalogOauthHostPath ? pstrdup(RestCatalogOauthHostPath) : NULL; + opts->clientId = RestCatalogClientId ? pstrdup(RestCatalogClientId) : NULL; + opts->clientSecret = RestCatalogClientSecret ? pstrdup(RestCatalogClientSecret) : NULL; + opts->scope = RestCatalogScope ? pstrdup(RestCatalogScope) : NULL; + opts->authType = RestCatalogAuthType; + opts->enableVendedCredentials = RestCatalogEnableVendedCredentials; + opts->locationPrefix = defaultLocationPrefix ? pstrdup(defaultLocationPrefix) : NULL; +} + + +/* + * ApplyServerOptionOverrides overrides the GUC-derived defaults in opts + * with any options explicitly set on the foreign server. + */ +static void +ApplyServerOptionOverrides(RestCatalogOptions * opts, ForeignServer *server) +{ + ListCell *lc; + + foreach(lc, server->options) + { + DefElem *def = (DefElem *) lfirst(lc); + const IcebergCatalogOptionDesc *desc = FindCatalogOptionDesc(def->defname); + + if (desc != NULL) + ApplyCatalogOptionValue(opts, desc, def); + } +} + + +/* + * ValidateRestCatalogOptions checks that the resolved options have + * the minimum required fields (e.g. rest_endpoint). + */ +static void +ValidateRestCatalogOptions(const RestCatalogOptions * opts, const char *catalog) +{ + if (opts->host == NULL || opts->host[0] == '\0') + ereport(ERROR, + (errcode(ERRCODE_FDW_OPTION_NAME_NOT_FOUND), + errmsg("\"rest_endpoint\" is not configured for REST catalog \"%s\"", + catalog), + errhint("Set the pg_lake_iceberg.rest_catalog_host GUC or " + "the \"rest_endpoint\" option on the server."))); +} + + +/* + * Build RestCatalogOptions for an iceberg_catalog server. + * + * The built-in pg_lake_rest_catalog server and any user-created + * iceberg_catalog REST server go through the same path: GUC defaults + * first, then server-level options applied on top. ALTER SERVER OPTIONS + * is blocked on the built-in server, so in practice its option set is + * always empty and the GUC defaults survive untouched -- which is + * exactly the historical "GUCs-only built-in REST" behavior, now + * reached through a single code path. + * + * `userVisibleCatalog` is the short identifier the user typed + * (e.g. "rest" or a user server name); it is what we store in + * opts->catalog so that error messages, the cross-catalog DML check, + * and the token cache key all stay in user-facing terms. The long + * built-in server name never leaks past this function. + */ +static RestCatalogOptions * +BuildRestCatalogOptionsFromServer(const char *serverName, + const char *userVisibleCatalog) +{ + ForeignServer *server = GetForeignServerByName(serverName, false); + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); + + Assert(strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0); + + RestCatalogOptions *opts = palloc0(sizeof(RestCatalogOptions)); + + opts->serverOid = server->serverid; + opts->catalog = pstrdup(userVisibleCatalog); + ApplyGUCDefaults(opts); + ApplyServerOptionOverrides(opts, server); + ValidateRestCatalogOptions(opts, userVisibleCatalog); + return opts; +} + + +/* + * ResolveRestCatalogOptions builds RestCatalogOptions for the catalog + * identifier the user typed. The short reserved names ('postgres', + * 'object_store', 'rest') are mapped to their pre-created built-in + * server names; all other inputs are looked up verbatim. + */ +RestCatalogOptions * +ResolveRestCatalogOptions(const char *catalog) +{ + const char *serverName = ResolveCatalogServerName(catalog); + + return BuildRestCatalogOptionsFromServer(serverName, catalog); +} + + +/* + * GetRestCatalogOptionsForRelation returns the REST catalog options for + * the given relation. The catalog option value is used as the server + * name (or built-in 'rest' literal). + */ +RestCatalogOptions * +GetRestCatalogOptionsForRelation(Oid relationId) +{ + ForeignTable *foreignTable = GetForeignTable(relationId); + char *catalog = GetStringOption(foreignTable->options, "catalog", false); + + if (catalog == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("catalog option is not set for relation %u", relationId))); + + return ResolveRestCatalogOptions(catalog); +} + + +/* + * CopyRestCatalogOptions deep-copies a RestCatalogOptions into the given + * memory context. All string fields are duplicated so the result is + * self-contained and independent of the source's lifetime. + */ +RestCatalogOptions * +CopyRestCatalogOptions(MemoryContext dst, const RestCatalogOptions * src) +{ + MemoryContext oldctx = MemoryContextSwitchTo(dst); + RestCatalogOptions *copy = palloc0(sizeof(RestCatalogOptions)); + + copy->serverOid = src->serverOid; + copy->catalog = pstrdup(src->catalog); + copy->host = pstrdup(src->host); + copy->oauthHostPath = src->oauthHostPath ? pstrdup(src->oauthHostPath) : NULL; + copy->clientId = src->clientId ? pstrdup(src->clientId) : NULL; + copy->clientSecret = src->clientSecret ? pstrdup(src->clientSecret) : NULL; + copy->scope = src->scope ? pstrdup(src->scope) : NULL; + copy->locationPrefix = src->locationPrefix ? pstrdup(src->locationPrefix) : NULL; + copy->catalogName = src->catalogName ? pstrdup(src->catalogName) : NULL; + copy->authType = src->authType; + copy->enableVendedCredentials = src->enableVendedCredentials; + + MemoryContextSwitchTo(oldctx); + return copy; +} + + /* * StartStageRestCatalogIcebergTableCreate stages the creation of an iceberg table * in the rest catalog. On any failure, an error is raised. If the table exists, @@ -120,19 +804,22 @@ StartStageRestCatalogIcebergTableCreate(Oid relationId) const char *catalogName = GetRestCatalogName(relationId); const char *namespaceName = GetRestCatalogNamespace(relationId); + RestCatalogOptions *opts = GetRestCatalogOptionsForRelation(relationId); + char *postUrl = - psprintf(REST_CATALOG_TABLES, RestCatalogHost, + psprintf(REST_CATALOG_TABLES, opts->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); - List *headers = PostHeadersWithAuth(); + List *headers = PostHeadersWithAuth(opts); - if (RestCatalogEnableVendedCredentials) + if (opts->enableVendedCredentials) { char *vendedCreds = pstrdup("X-Iceberg-Access-Delegation: vended-credentials"); headers = lappend(headers, vendedCreds); } - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body->data, headers); + HttpResult httpResult = SendRequestToRestCatalog(opts, HTTP_POST, postUrl, body->data, + headers); if (httpResult.status != 200) { @@ -209,8 +896,9 @@ FinishStageRestCatalogIcebergTableCreateRestRequest(Oid relationId, DataFileSche const char *catalogName = GetRestCatalogName(relationId); const char *namespaceName = GetRestCatalogNamespace(relationId); const char *relationName = GetRestCatalogTableName(relationId); + RestCatalogOptions *opts = GetRestCatalogOptionsForRelation(relationId); - appendStringInfo(location, "%s/%s/%s/%s/%d", IcebergDefaultLocationPrefix, catalogName, namespaceName, relationName, relationId); + appendStringInfo(location, "%s/%s/%s/%s/%d", opts->locationPrefix, catalogName, namespaceName, relationName, relationId); appendJsonString(body, "location", location->data); appendStringInfoChar(body, '}'); /* end set-location */ @@ -256,7 +944,7 @@ FinishStageRestCatalogIcebergTableCreateRestRequest(Oid relationId, DataFileSche * allowed locations as part of the namespace. */ void -RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceName) +RegisterNamespaceToRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName) { /* * First, we need to check if the namespace already exists in Rest Catalog @@ -264,9 +952,10 @@ RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceNam */ char *getUrl = psprintf(REST_CATALOG_NAMESPACE_NAME, - RestCatalogHost, URLEncodePath(catalogName), + opts->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, GetHeadersWithAuth()); + HttpResult httpResult = SendRequestToRestCatalog(opts, HTTP_GET, getUrl, NULL, + GetHeadersWithAuth(opts)); switch (httpResult.status) { @@ -281,7 +970,7 @@ RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceNam /* * Does not exists, we'll create it. */ - CreateNamespaceOnRestCatalog(catalogName, namespaceName); + CreateNamespaceOnRestCatalog(opts, catalogName, namespaceName); break; } @@ -300,7 +989,7 @@ RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceNam if (serverAllowedLocation) { const char *defaultAllowedLocation = - psprintf("%s/%s/%s", IcebergDefaultLocationPrefix, catalogName, namespaceName); + psprintf("%s/%s/%s", opts->locationPrefix, catalogName, namespaceName); /* @@ -346,7 +1035,7 @@ RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceNam * namespace exists when creating a table in the given namespace. */ void -ErrorIfRestNamespaceDoesNotExist(const char *catalogName, const char *namespaceName) +ErrorIfRestNamespaceDoesNotExist(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName) { /* * First, we need to check if the namespace already exists in Rest Catalog @@ -354,10 +1043,10 @@ ErrorIfRestNamespaceDoesNotExist(const char *catalogName, const char *namespaceN */ char *getUrl = psprintf(REST_CATALOG_NAMESPACE_NAME, - RestCatalogHost, URLEncodePath(catalogName), + opts->host, URLEncodePath(catalogName), URLEncodePath(namespaceName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, GetHeadersWithAuth()); - + HttpResult httpResult = SendRequestToRestCatalog(opts, HTTP_GET, getUrl, NULL, + GetHeadersWithAuth(opts)); /* namespace not found */ if (httpResult.status == 404) @@ -389,7 +1078,9 @@ GetMetadataLocationForRestCatalogForIcebergTable(Oid relationId) const char *relationName = GetRestCatalogTableName(relationId); const char *namespaceName = GetRestCatalogNamespace(relationId); - return GetMetadataLocationFromRestCatalog(restCatalogName, namespaceName, relationName); + RestCatalogOptions *opts = GetRestCatalogOptionsForRelation(relationId); + + return GetMetadataLocationFromRestCatalog(opts, restCatalogName, namespaceName, relationName); } @@ -397,14 +1088,14 @@ GetMetadataLocationForRestCatalogForIcebergTable(Oid relationId) * Gets the metadata location for a relation from the external catalog. */ char * -GetMetadataLocationFromRestCatalog(const char *restCatalogName, const char *namespaceName, const char *relationName) +GetMetadataLocationFromRestCatalog(RestCatalogOptions * opts, const char *restCatalogName, const char *namespaceName, const char *relationName) { char *getUrl = psprintf(REST_CATALOG_TABLE, - RestCatalogHost, URLEncodePath(restCatalogName), URLEncodePath(namespaceName), URLEncodePath(relationName)); + opts->host, URLEncodePath(restCatalogName), URLEncodePath(namespaceName), URLEncodePath(relationName)); - List *headers = GetHeadersWithAuth(); - HttpResult hr = SendRequestToRestCatalog(HTTP_GET, getUrl, NULL, headers); + List *headers = GetHeadersWithAuth(opts); + HttpResult hr = SendRequestToRestCatalog(opts, HTTP_GET, getUrl, NULL, headers); if (hr.status != 200) { @@ -425,7 +1116,7 @@ GetMetadataLocationFromRestCatalog(const char *restCatalogName, const char *name * an error is raised. */ static void -CreateNamespaceOnRestCatalog(const char *catalogName, const char *namespaceName) +CreateNamespaceOnRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName) { /* POST create */ StringInfoData body; @@ -449,10 +1140,11 @@ CreateNamespaceOnRestCatalog(const char *catalogName, const char *namespaceName) appendStringInfoChar(&body, '}'); /* close body */ char *postUrl = - psprintf(REST_CATALOG_NAMESPACE, RestCatalogHost, + psprintf(REST_CATALOG_NAMESPACE, opts->host, URLEncodePath(catalogName)); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, postUrl, body.data, PostHeadersWithAuth()); + HttpResult httpResult = SendRequestToRestCatalog(opts, HTTP_POST, postUrl, body.data, + PostHeadersWithAuth(opts)); if (httpResult.status != 200) { @@ -464,11 +1156,11 @@ CreateNamespaceOnRestCatalog(const char *catalogName, const char *namespaceName) * Creates the headers for a POST request with authentication. */ List * -PostHeadersWithAuth(void) +PostHeadersWithAuth(RestCatalogOptions * opts) { bool forceRefreshToken = false; - return list_make3(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(forceRefreshToken)), + return list_make3(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(opts, forceRefreshToken)), pstrdup("Accept: application/json"), pstrdup("Content-Type: application/json")); } @@ -479,11 +1171,11 @@ PostHeadersWithAuth(void) * Creates the headers for a DELETE request with authentication. */ List * -DeleteHeadersWithAuth(void) +DeleteHeadersWithAuth(RestCatalogOptions * opts) { bool forceRefreshToken = false; - return list_make1(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(forceRefreshToken))); + return list_make1(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(opts, forceRefreshToken))); } @@ -492,11 +1184,11 @@ DeleteHeadersWithAuth(void) * Creates the headers for a GET request with authentication. */ static List * -GetHeadersWithAuth(void) +GetHeadersWithAuth(RestCatalogOptions * opts) { bool forceRefreshToken = false; - return list_make2(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(forceRefreshToken)), + return list_make2(psprintf("Authorization: Bearer %s", GetRestCatalogAccessToken(opts, forceRefreshToken)), pstrdup("Accept: application/json")); } @@ -538,12 +1230,102 @@ ReportHTTPError(HttpResult httpResult, int level) /* -* Gets an access token from rest catalog using client credentials that are -* configured via GUC variables. Caches the token until it is about to expire. -*/ + * Syscache invalidation callback for pg_foreign_server changes. + * Any ALTER/DROP SERVER blows away the entire token cache so stale + * credentials are never reused. The cache is rebuilt lazily on the + * next token lookup. + * + * We ignore hashvalue and reset the whole cache rather than selectively + * invalidating a single server's entry. With a handful of servers and + * infrequent ALTER SERVER, the cost of a few extra OAuth round-trips is + * negligible compared to the complexity of tracking per-entry hash + * values for targeted invalidation. + */ +static void +InvalidateRestTokenCache(Datum arg, int cacheid, uint32 hashvalue) +{ + if (RestCatalogTokenCache == NULL) + return; + + MemoryContextReset(RestTokenCacheCtx); + RestCatalogTokenCache = NULL; +} + + +/* + * Initialize the per-catalog token cache hash table if needed. + * + * TokenCacheCallbackRegistered is separate from RestCatalogTokenCache because + * the callback must be registered exactly once per backend lifetime + * (CacheRegisterSyscacheCallback appends to a fixed-size array), while + * RestCatalogTokenCache is reset to NULL on every invalidation. + */ +static bool TokenCacheCallbackRegistered = false; + +static void +InitTokenCacheIfNeeded(void) +{ + if (!TokenCacheCallbackRegistered) + { + CacheRegisterSyscacheCallback(FOREIGNSERVEROID, + InvalidateRestTokenCache, + (Datum) 0); + TokenCacheCallbackRegistered = true; + } + + if (RestCatalogTokenCache != NULL) + return; + + if (RestTokenCacheCtx == NULL) + RestTokenCacheCtx = AllocSetContextCreate(CacheMemoryContext, + "RestTokenCacheCtx", + ALLOCSET_DEFAULT_SIZES); + + HASHCTL ctl; + + memset(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(RestCatalogTokenCacheEntry); + ctl.hcxt = RestTokenCacheCtx; + + RestCatalogTokenCache = hash_create("REST Catalog Token Cache", + 8, &ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); +} + + +/* + * Gets an access token from rest catalog. Caches the token per catalog + * (keyed by iceberg_catalog server OID) until it is about to expire. + */ static char * -GetRestCatalogAccessToken(bool forceRefreshToken) +GetRestCatalogAccessToken(RestCatalogOptions * opts, bool forceRefreshToken) { + if (opts == NULL) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("REST catalog options must not be NULL when fetching access token"))); + + /* + * Every resolved RestCatalogOptions originates from + * BuildRestCatalogOptionsFromServer, which always sets serverOid. A + * missing OID would silently funnel every catalog into the same cache + * slot, so trap it loudly here. + */ + Assert(OidIsValid(opts->serverOid)); + + InitTokenCacheIfNeeded(); + + bool found = false; + RestCatalogTokenCacheEntry *entry = + hash_search(RestCatalogTokenCache, &opts->serverOid, HASH_ENTER, &found); + + if (!found) + { + entry->accessToken = NULL; + entry->accessTokenExpiry = 0; + } + /* * Calling initial time or token will expire in 1 minute, fetch a new * token. @@ -551,74 +1333,78 @@ GetRestCatalogAccessToken(bool forceRefreshToken) TimestampTz now = GetCurrentTimestamp(); const int MINUTE_IN_MSECS = 60 * 1000; - if (forceRefreshToken || RestCatalogAccessTokenExpiry == 0 || - !TimestampDifferenceExceeds(now, RestCatalogAccessTokenExpiry, MINUTE_IN_MSECS)) + if (forceRefreshToken || entry->accessTokenExpiry == 0 || + !TimestampDifferenceExceeds(now, entry->accessTokenExpiry, MINUTE_IN_MSECS)) { - if (RestCatalogAccessToken) + if (entry->accessToken) { - pfree(RestCatalogAccessToken); - RestCatalogAccessToken = NULL; + pfree(entry->accessToken); + entry->accessToken = NULL; + entry->accessTokenExpiry = 0; } char *accessToken = NULL; int expiresIn = 0; - FetchRestCatalogAccessToken(&accessToken, &expiresIn); + FetchRestCatalogAccessToken(opts, &accessToken, &expiresIn); - RestCatalogAccessToken = MemoryContextStrdup(TopMemoryContext, accessToken); - RestCatalogAccessTokenExpiry = now + (int64_t) expiresIn * 1000000; /* expiresIn is in - * seconds */ + entry->accessToken = MemoryContextStrdup(RestTokenCacheCtx, accessToken); + entry->accessTokenExpiry = now + (int64_t) expiresIn * 1000000; /* expiresIn is in + * seconds */ } - Assert(RestCatalogAccessToken != NULL); + Assert(entry->accessToken != NULL); - return RestCatalogAccessToken; + return entry->accessToken; } /* -* Fetches an access token from rest catalog using client credentials that are -* configured via GUC variables. +* Fetches an access token from rest catalog using the given options. */ static void -FetchRestCatalogAccessToken(char **accessToken, int *expiresIn) +FetchRestCatalogAccessToken(RestCatalogOptions * opts, char **accessToken, int *expiresIn) { - if (!RestCatalogHost || !*RestCatalogHost) - ereport(ERROR, (errmsg("pg_lake_iceberg.rest_catalog_host should be set"))); - if (!RestCatalogClientSecret || !*RestCatalogClientSecret) - ereport(ERROR, (errmsg("pg_lake_iceberg.rest_catalog_client_secret should be set"))); + Assert(opts->host != NULL && opts->host[0] != '\0'); + if (!opts->clientSecret || !*opts->clientSecret) + ereport(ERROR, + (errmsg("REST catalog client_secret is not configured"), + errhint("Set the \"client_secret\" option on the server " + "or the pg_lake_iceberg.rest_catalog_client_secret GUC."))); - char *accessTokenUrl = RestCatalogOauthHostPath; + char *accessTokenUrl = opts->oauthHostPath; /* - * if pg_lake_iceberg.rest_catalog_oauth_host_path is not set, use - * Polaris' default oauth token endpoint + * if oauthHostPath is not set, use Polaris' default oauth token endpoint */ - if (*accessTokenUrl == '\0') - accessTokenUrl = psprintf(REST_CATALOG_AUTH_TOKEN_PATH, RestCatalogHost); + if (!accessTokenUrl || *accessTokenUrl == '\0') + accessTokenUrl = psprintf(REST_CATALOG_AUTH_TOKEN_PATH, opts->host); /* Form-encoded body */ StringInfoData body; initStringInfo(&body); appendStringInfo(&body, "grant_type=client_credentials&scope=%s", - URLEncodePath(RestCatalogScope)); + URLEncodePath(opts->scope)); /* Headers */ List *headers = NIL; - if (RestCatalogAuthType == REST_CATALOG_AUTH_TYPE_HORIZON) + if (opts->authType == REST_CATALOG_AUTH_TYPE_HORIZON) { /* Put secret in body (ignore client ID) */ - appendStringInfo(&body, "&client_secret=%s", URLEncodePath(RestCatalogClientSecret)); + appendStringInfo(&body, "&client_secret=%s", URLEncodePath(opts->clientSecret)); } else { - if (!RestCatalogClientId || !*RestCatalogClientId) - ereport(ERROR, (errmsg("pg_lake_iceberg.rest_catalog_client_id should be set"))); + if (!opts->clientId || !*opts->clientId) + ereport(ERROR, + (errmsg("REST catalog client_id is not configured"), + errhint("Set the \"client_id\" option on the server " + "or the pg_lake_iceberg.rest_catalog_client_id GUC."))); /* Build Authorization: Basic */ - char *encodedAuth = EncodeBasicAuth(RestCatalogClientId, RestCatalogClientSecret); + char *encodedAuth = EncodeBasicAuth(opts->clientId, opts->clientSecret); char *authHeader = psprintf("Authorization: Basic %s", encodedAuth); headers = lappend(headers, authHeader); @@ -626,8 +1412,14 @@ FetchRestCatalogAccessToken(char **accessToken, int *expiresIn) headers = lappend(headers, "Content-Type: application/x-www-form-urlencoded"); - /* POST */ - HttpResult httpResponse = SendRequestToRestCatalog(HTTP_POST, accessTokenUrl, body.data, headers); + /* + * Pass NULL opts so SendRequestToRestCatalog skips the 419 token-refresh + * retry branch. Otherwise a 419 here would call + * GetRestCatalogAccessToken -> FetchRestCatalogAccessToken -> + * SendRequestToRestCatalog in an infinite loop. + */ + HttpResult httpResponse = SendRequestToRestCatalog(NULL, HTTP_POST, accessTokenUrl, + body.data, headers); if (httpResponse.status != 200) ereport(ERROR, @@ -835,10 +1627,16 @@ GetRestCatalogNamespace(Oid relationId) /* -* Readable rest catalog tables always use the catalog_name option -* as the catalog name in the external catalog. Writable rest catalog tables -* use the current database name as the catalog name. -*/ + * Returns the catalog name to use for REST API calls. + * + * Writable tables always use the current database name so that a + * subsequent ALTER SERVER ? ADD/SET catalog_name cannot silently + * re-route an existing table to a different REST namespace. + * + * Read-only tables always have catalog_name baked into their table + * options at CREATE TABLE time (inherited from the server option or + * defaulted to the database name). + */ char * GetRestCatalogName(Oid relationId) { @@ -847,27 +1645,16 @@ GetRestCatalogName(Oid relationId) Assert(catalogType == REST_CATALOG_READ_ONLY || catalogType == REST_CATALOG_READ_WRITE); - if (catalogType == REST_CATALOG_READ_ONLY) - { + if (catalogType == REST_CATALOG_READ_WRITE) + return get_database_name(MyDatabaseId); - Assert(GetIcebergCatalogType(relationId) == REST_CATALOG_READ_ONLY || - GetIcebergCatalogType(relationId) == REST_CATALOG_READ_WRITE); - - ForeignTable *foreignTable = GetForeignTable(relationId); - List *options = foreignTable->options; - - char *catalogName = GetStringOption(options, "catalog_name", false); - - /* user provided the custom catalog name */ - if (!catalogName) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("catalog_name option is required for rest catalog iceberg tables"))); + ForeignTable *foreignTable = GetForeignTable(relationId); + char *catalogName = GetStringOption(foreignTable->options, "catalog_name", false); + if (catalogName != NULL) return catalogName; - } - return get_database_name(MyDatabaseId); + elog(ERROR, "catalog_name missing on read-only REST catalog table %u", relationId); } @@ -1139,9 +1926,14 @@ ClassifyRestCatalogRequestRetry(long status, int maxRetry, int retryNo) * cancel backend). This function can be called at post-commit hook, * so normally we wouldn't want any errors to happen, but then * Postgres already prevents post-commit backends to receive signals. + * + * When opts is non-NULL the retry callback can force-refresh the + * access token and patch the Authorization header on a 419 response. + * Pass opts = NULL for the token-fetch request itself to avoid recursion. */ HttpResult -SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, List *headers) +SendRequestToRestCatalog(RestCatalogOptions * opts, HttpMethod method, const char *url, + const char *body, List *headers) { const int MAX_HTTP_RETRY_FOR_REST_CATALOG = 3; @@ -1169,7 +1961,7 @@ SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, L * new token. */ bool forceRefreshToken = true; - char *freshToken = GetRestCatalogAccessToken(forceRefreshToken); + char *freshToken = GetRestCatalogAccessToken(opts, forceRefreshToken); UpdateAuthorizationHeader(headers, freshToken); continue; diff --git a/pg_lake_iceberg/src/test/rest_catalog.c b/pg_lake_iceberg/src/test/rest_catalog.c index 791ceb2a..93a96e15 100644 --- a/pg_lake_iceberg/src/test/rest_catalog.c +++ b/pg_lake_iceberg/src/test/rest_catalog.c @@ -37,6 +37,8 @@ register_namespace_to_rest_catalog(PG_FUNCTION_ARGS) char *catalogName = text_to_cstring(PG_GETARG_TEXT_P(0)); char *namespaceName = text_to_cstring(PG_GETARG_TEXT_P(1)); - RegisterNamespaceToRestCatalog(catalogName, namespaceName); + RestCatalogOptions *opts = ResolveRestCatalogOptions(REST_CATALOG_NAME); + + RegisterNamespaceToRestCatalog(opts, catalogName, namespaceName); PG_RETURN_VOID(); } diff --git a/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h b/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h index d7e2d22f..e76afbd5 100644 --- a/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h +++ b/pg_lake_table/include/pg_lake/transaction/track_iceberg_metadata_changes.h @@ -61,3 +61,4 @@ extern PGDLLEXPORT void ResetRestCatalogRequests(void); extern PGDLLEXPORT HTAB *GetTrackedIcebergMetadataOperations(void); extern PGDLLEXPORT bool HasAnyTrackedIcebergMetadataChanges(void); extern PGDLLEXPORT bool IsIcebergTableCreatedInCurrentTransaction(Oid relation); +extern PGDLLEXPORT void BindRelationToXactRestCatalog(Oid relationId); diff --git a/pg_lake_table/src/ddl/create_table.c b/pg_lake_table/src/ddl/create_table.c index 510cd7d4..df791310 100644 --- a/pg_lake_table/src/ddl/create_table.c +++ b/pg_lake_table/src/ddl/create_table.c @@ -21,9 +21,11 @@ #include "access/table.h" #include "access/tableam.h" #include "access/relation.h" +#include "catalog/dependency.h" #include "catalog/namespace.h" #include "catalog/pg_attribute.h" #include "catalog/pg_class.h" +#include "catalog/pg_foreign_server.h" #include "commands/dbcommands.h" #include "commands/defrem.h" #include "commands/extension.h" @@ -90,6 +92,8 @@ static bool IsJsonOrCSVBackedTable(PgLakeTableType tableType, List *options); static void ErrorIfUnsupportedColumnTypeForJsonOrCSVTables(List *columnDefList); static void ErrorIfUsingGeometryWithoutSpatialAnalytics(List *columnDefList); static void ErrorIfUnsupportedLakeTable(CreateForeignTableStmt *createStmt); +static void ErrorIfCreateForeignTableOnIcebergCatalog(CreateForeignTableStmt *createStmt); +static void RecordIcebergCatalogServerDependency(Oid relationId, List *options); static void ErrorIfWritableTableWithReservedColumnName(List *columnDefList, PgLakeTableType tableType); static void ErrorIfInvalidFilenameColumn(List *columnDefList); static bool IsConflictingColumnNameForReadParquet(const char *columnName); @@ -324,6 +328,9 @@ ErrorIfUsingGeometryWithoutSpatialAnalytics(List *columnDefList) * * We check for unsupported features in the table definition, such as unsupported URLs or unsupported * combinations such as writable tables without column definitions. +* +* Also blocks CREATE FOREIGN TABLE on iceberg_catalog servers, which have no +* handler. Tables should be created via CREATE TABLE ... USING iceberg instead. */ bool ErrorUnsupportedCreatePgLakeTableHandler(ProcessUtilityParams * params, void *arg) @@ -339,6 +346,8 @@ ErrorUnsupportedCreatePgLakeTableHandler(ProcessUtilityParams * params, void *ar CreateForeignTableStmt *createStmt = (CreateForeignTableStmt *) plannedStmt->utilityStmt; + ErrorIfCreateForeignTableOnIcebergCatalog(createStmt); + if (!IsCreateLakeTable(createStmt)) { /* not a lake table */ @@ -351,6 +360,72 @@ ErrorUnsupportedCreatePgLakeTableHandler(ProcessUtilityParams * params, void *ar } +/* + * ErrorIfCreateForeignTableOnIcebergCatalog blocks CREATE FOREIGN TABLE + * when the target server uses the iceberg_catalog FDW, which has no handler. + */ +static void +ErrorIfCreateForeignTableOnIcebergCatalog(CreateForeignTableStmt *createStmt) +{ + ForeignServer *server = + GetForeignServerByName(createStmt->servername, true); + + if (server == NULL) + return; + + ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid); + + if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) == 0) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot create foreign tables on iceberg_catalog server \"%s\"", + createStmt->servername), + errhint("Use CREATE TABLE ... USING iceberg WITH (catalog = '%s') instead.", + createStmt->servername))); +} + + +/* + * RecordIcebergCatalogServerDependency records a DEPENDENCY_NORMAL from + * the iceberg table to its catalog server in pg_depend, so that + * DROP SERVER is blocked while dependent tables exist (and + * DROP SERVER CASCADE drops them). + * + * Both user-created iceberg_catalog servers and the three pre-created + * built-in catalog servers get a dependency entry: the short reserved + * names ('rest', 'postgres', 'object_store') are mapped to the + * corresponding built-in server (e.g. 'pg_lake_rest_catalog') via + * ResolveCatalogServerName. + */ +static void +RecordIcebergCatalogServerDependency(Oid relationId, List *options) +{ + char *catalog = GetStringOption(options, "catalog", false); + + if (catalog == NULL) + return; + + const char *serverName = ResolveCatalogServerName(catalog); + ForeignServer *server = GetForeignServerByName(serverName, true); + + if (server == NULL) + return; + + ObjectAddress myself; + ObjectAddress referenced; + + myself.classId = RelationRelationId; + myself.objectId = relationId; + myself.objectSubId = 0; + + referenced.classId = ForeignServerRelationId; + referenced.objectId = server->serverid; + referenced.objectSubId = 0; + + recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); +} + + /* * ErrorIfUnsupportedLakeTable is a helper function for checking unsupported features * in CREATE FOREIGN TABLE statements that are pg_lake tables. @@ -659,10 +734,55 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) createStmt->options = lappend(createStmt->options, defaultCatalog); } + else + { + /* + * The pre-created built-in catalog servers (pg_lake_rest_catalog, + * pg_lake_postgres_catalog, pg_lake_object_store_catalog) are + * internal anchors and must not be addressable via the user-facing + * catalog= option. Users say catalog='rest' / 'postgres' / + * 'object_store' and we map short -> long internally. + */ + char *catalogVal = strVal(catalogOption->arg); + + if (IsBuiltinCatalogServerName(catalogVal)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("catalog name \"%s\" is reserved for an internal " + "pg_lake_iceberg catalog server", + catalogVal), + errhint("Use catalog='%s', '%s', or '%s' instead.", + REST_CATALOG_NAME, + POSTGRES_CATALOG_NAME, + OBJECT_STORE_CATALOG_NAME))); + } bool hasRestCatalogOption = HasRestCatalogTableOption(createStmt->options); bool hasObjectStoreCatalogOption = HasObjectStoreCatalogTableOption(createStmt->options); + /* + * For user-created iceberg_catalog servers, verify that the current user + * has USAGE privilege on the server. Built-in catalog names ('rest', + * 'postgres', 'object_store') have no backing server object and skip this + * check — access is controlled by the lake_write role instead. + */ + if (hasRestCatalogOption) + { + char *catalog = GetStringOption(createStmt->options, "catalog", false); + + if (!IsCatalogOwnedByExtension(catalog)) + { + ForeignServer *server = GetForeignServerByName(catalog, false); + AclResult aclresult = object_aclcheck(ForeignServerRelationId, + server->serverid, + GetUserId(), + ACL_USAGE); + + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FOREIGN_SERVER, catalog); + } + } + if (hasObjectStoreCatalogOption || hasRestCatalogOption) { Oid namespaceId = RangeVarGetAndCheckCreationNamespace(createStmt->base.relation, NoLock, NULL); @@ -722,7 +842,19 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) if (catalogNameProvided == NULL && hasExternalCatalogReadOnlyOption) { - catalogName = get_database_name(MyDatabaseId); + if (hasRestCatalogOption) + { + char *catalogOptionValue = + GetStringOption(createStmt->options, "catalog", false); + RestCatalogOptions *opts = + ResolveRestCatalogOptions(catalogOptionValue); + + catalogName = opts->catalogName ? pstrdup(opts->catalogName) + : get_database_name(MyDatabaseId); + } + else + catalogName = get_database_name(MyDatabaseId); + createStmt->options = lappend(createStmt->options, makeDefElem("catalog_name", (Node *) makeString(catalogName), -1)); @@ -734,10 +866,14 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) if (hasRestCatalogOption && hasExternalCatalogReadOnlyOption) { - ErrorIfRestNamespaceDoesNotExist(catalogName, catalogNamespace); + char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); + RestCatalogOptions *opts = + ResolveRestCatalogOptions(catalogOptionValue); + + ErrorIfRestNamespaceDoesNotExist(opts, catalogName, catalogNamespace); metadataLocation = - GetMetadataLocationFromRestCatalog(catalogName, catalogNamespace, catalogTableName); + GetMetadataLocationFromRestCatalog(opts, catalogName, catalogNamespace, catalogTableName); } else if (hasObjectStoreCatalogOption && hasExternalCatalogReadOnlyOption) { @@ -752,12 +888,11 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) if (!hasExternalCatalogReadOnlyOption) { /* - * For writable object store catalog tables, we need to continue - * with the regular iceberg table creation process. We only fill - * in the catalog options here. Other than that, we simply check - * if user provided any catalog options. That's not allowed, - * writable tables only inherit from the database name, schema - * name, and table name. + * Writable tables always derive catalog_name, catalog_namespace, + * and catalog_table_name from the database name, schema name, and + * table name. Explicit catalog options on the table are + * rejected, and the server must not have catalog_name set either, + * since that would conflict with the derived values. */ if (catalogNamespaceProvided != NULL || catalogTableNameProvided != NULL || @@ -767,6 +902,19 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) errmsg("writable %s catalog iceberg tables do not " "allow explicit catalog options", hasObjectStoreCatalogOption ? OBJECT_STORE_CATALOG_NAME : REST_CATALOG_NAME))); } + + if (hasRestCatalogOption) + { + char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); + RestCatalogOptions *opts = + ResolveRestCatalogOptions(catalogOptionValue); + + if (opts->catalogName != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("writable REST catalog tables cannot use a server " + "with catalog_name set"))); + } } else if (createStmt->base.tableElts == NIL && hasExternalCatalogReadOnlyOption) { @@ -792,6 +940,10 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) PgLakeCommonParentProcessUtility(params); + Oid readOnlyRelId = RangeVarGetRelid(createStmt->base.relation, NoLock, false); + + RecordIcebergCatalogServerDependency(readOnlyRelId, createStmt->options); + return true; } } @@ -849,6 +1001,18 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) "tables"); } + if (hasRestCatalogOption && locationOption == NULL && + !HasReadOnlyOption(createStmt->options)) + { + char *catalogOptionValue = + GetStringOption(createStmt->options, "catalog", false); + RestCatalogOptions *opts = + ResolveRestCatalogOptions(catalogOptionValue); + + if (opts->locationPrefix != NULL) + defaultLocationPrefix = opts->locationPrefix; + } + /* * We will set the location by using the default location prefix when user * does not specify the location but already set default locatipn prefix. @@ -885,6 +1049,8 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) /* the table is now created, get its OID */ Oid relationId = RangeVarGetRelid(createStmt->base.relation, NoLock, false); + RecordIcebergCatalogServerDependency(relationId, createStmt->options); + char *location; if (locationOption != NULL) @@ -942,7 +1108,11 @@ ProcessCreateIcebergTableFromForeignTableStmt(ProcessUtilityParams * params) * database name. We normally encode that in GetRestCatalogName() * etc., but here we need to do it early before the table is created. */ - RegisterNamespaceToRestCatalog(get_database_name(MyDatabaseId), + char *catalogOptionValue = GetStringOption(createStmt->options, "catalog", false); + RestCatalogOptions *opts = + ResolveRestCatalogOptions(catalogOptionValue); + + RegisterNamespaceToRestCatalog(opts, get_database_name(MyDatabaseId), get_namespace_name(namespaceId)); } diff --git a/pg_lake_table/src/fdw/option.c b/pg_lake_table/src/fdw/option.c index fca5c637..7a5984a2 100644 --- a/pg_lake_table/src/fdw/option.c +++ b/pg_lake_table/src/fdw/option.c @@ -33,6 +33,7 @@ #include "catalog/pg_foreign_table.h" #include "commands/defrem.h" #include "commands/extension.h" +#include "foreign/foreign.h" #include "pg_lake/iceberg/catalog.h" #include "pg_lake/partitioning/partition_by_parser.h" #include "pg_lake/permissions/roles.h" @@ -764,11 +765,7 @@ pg_lake_iceberg_validator(PG_FUNCTION_ARGS) { char *icebergCatalogName = defGetString(def); - /* - * We only accept "rest" and "postgres" for now. If not provided, - * assume "postgres" by default. Don't allow anything. - */ - if (pg_strncasecmp(icebergCatalogName, REST_CATALOG_NAME, strlen(icebergCatalogName)) == 0) + if (IsRestCatalog(icebergCatalogName)) { /* * at this point, we cannot tell whether it's read only or @@ -777,9 +774,8 @@ pg_lake_iceberg_validator(PG_FUNCTION_ARGS) */ icebergCatalogType = REST_CATALOG_READ_ONLY; } - else if (pg_strncasecmp(icebergCatalogName, OBJECT_STORE_CATALOG_NAME, strlen(icebergCatalogName)) == 0) + else if (pg_strcasecmp(icebergCatalogName, OBJECT_STORE_CATALOG_NAME) == 0) { - /* * at this point, we cannot tell whether it's read only or * read write. We'll determine that later based on the @@ -787,13 +783,14 @@ pg_lake_iceberg_validator(PG_FUNCTION_ARGS) */ icebergCatalogType = OBJECT_STORE_READ_ONLY; } - else if (pg_strncasecmp(icebergCatalogName, POSTGRES_CATALOG_NAME, strlen(icebergCatalogName)) == 0) + else if (pg_strcasecmp(icebergCatalogName, POSTGRES_CATALOG_NAME) == 0) icebergCatalogType = POSTGRES_CATALOG; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid catalog option: %s", icebergCatalogName), - errdetail("Only " REST_CATALOG_NAME " and " POSTGRES_CATALOG_NAME " are supported for now."))); + errdetail("Use \"rest\", \"object_store\", \"postgres\", " + "or the name of an iceberg_catalog server."))); } else if (catalog == ForeignTableRelationId && strcmp(def->defname, "read_only") == 0) { diff --git a/pg_lake_table/src/fdw/pg_lake_table.c b/pg_lake_table/src/fdw/pg_lake_table.c index d4107894..3f5cf9f4 100644 --- a/pg_lake_table/src/fdw/pg_lake_table.c +++ b/pg_lake_table/src/fdw/pg_lake_table.c @@ -99,6 +99,7 @@ #include "pg_lake/pgduck/write_data.h" #include "pg_lake/planner/restriction_collector.h" #include "pg_lake/storage/local_storage.h" +#include "pg_lake/transaction/track_iceberg_metadata_changes.h" #include "pg_lake/util/item_pointer_utils.h" #include "pg_lake/util/rel_utils.h" #include "pg_lake/util/string_utils.h" @@ -2205,6 +2206,8 @@ postgresBeginForeignModify(ModifyTableState *mtstate, if (eflags & EXEC_FLAG_EXPLAIN_ONLY) return; + BindRelationToXactRestCatalog(RelationGetRelid(resultRelInfo->ri_RelationDesc)); + /* Construct an execution state. */ fmstate = create_foreign_modify(resultRelInfo->ri_RelationDesc, resultRelInfo->ri_RangeTableIndex, diff --git a/pg_lake_table/src/fdw/writable_table.c b/pg_lake_table/src/fdw/writable_table.c index dcab98fe..f98252e3 100644 --- a/pg_lake_table/src/fdw/writable_table.c +++ b/pg_lake_table/src/fdw/writable_table.c @@ -1095,6 +1095,8 @@ AddQueryResultToTable(Oid relationId, char *readQuery, TupleDesc queryTupleDesc, { Assert(queryTupleDesc != NULL && queryTupleDesc->natts > 0); + BindRelationToXactRestCatalog(relationId); + int64 rowsProcessed = 0; ForeignTable *foreignTable = GetForeignTable(relationId); List *options = foreignTable->options; diff --git a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c index 2c3e0655..136befbd 100644 --- a/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c +++ b/pg_lake_table/src/transaction/track_iceberg_metadata_changes.c @@ -117,13 +117,22 @@ static int GetEffectiveMaxSnapshotAgeInSecs(Oid relationId); static HTAB *TrackedIcebergMetadataOperationsHash = NULL; /* -* Hash table to track rest catalog requests per relation within a transaction. -*/ -static HTAB *RestCatalogRequestsHash = NULL; + * Per-transaction context for REST catalog requests. Groups the request + * hash, pre-resolved catalog options, and the pre-allocated memory context + * used at XACT_EVENT_COMMIT time (where syscache lookups and large pallocs + * are forbidden). Allocated in TopTransactionContext and automatically + * freed at transaction end. Only one REST catalog server is allowed per + * transaction. + */ +typedef struct PgLakeXactRestCatalogContext +{ + HTAB *requestsHash; + MemoryContext commitContext; + RestCatalogOptions *catalogOpts; +} PgLakeXactRestCatalogContext; +static PgLakeXactRestCatalogContext * PgLakeXactRestCatalog = NULL; -/* some pre-allocated memory so we don't palloc() ever in XACT_COMMIT */ -static MemoryContext PgLakeXactCommitContext = NULL; /* * TrackIcebergMetadataChangesInTx tracks metadata changes for a given relation @@ -214,8 +223,7 @@ ResetTrackedIcebergMetadataOperation(void) void ResetRestCatalogRequests(void) { - RestCatalogRequestsHash = NULL; - PgLakeXactCommitContext = NULL; + PgLakeXactRestCatalog = NULL; } @@ -227,22 +235,24 @@ ResetRestCatalogRequests(void) void PostAllRestCatalogRequests(void) { - if (RestCatalogRequestsHash == NULL) + if (PgLakeXactRestCatalog == NULL) { return; } /* - * Switch to PgLakeXactCommitContext to avoid palloc() in XACT_COMMIT, as - * PgLakeXactCommitContext is pre-allocated before. + * Switch to commitContext to avoid palloc() in XACT_COMMIT, as + * commitContext is pre-allocated before. */ - MemoryContext oldContext = MemoryContextSwitchTo(PgLakeXactCommitContext); + MemoryContext oldContext = MemoryContextSwitchTo(PgLakeXactRestCatalog->commitContext); + + Assert(PgLakeXactRestCatalog->catalogOpts != NULL); /* - * We need to iterate over the RestCatalogRequestsHash twice: 1. First, we - * need to post the create table requests to create the iceberg tables in - * the rest catalog. 2. Then, we need to post all the other modifications - * (like adding snapshots, partition specs, etc.) + * We need to iterate over the requests hash twice: 1. First, we need to + * post the create table requests to create the iceberg tables in the rest + * catalog. 2. Then, we need to post all the other modifications (like + * adding snapshots, partition specs, etc.) * * This is because the create table requests need to be completed before * we can add snapshots to the tables. And, REST API does not support @@ -250,7 +260,7 @@ PostAllRestCatalogRequests(void) */ HASH_SEQ_STATUS status; - hash_seq_init(&status, RestCatalogRequestsHash); + hash_seq_init(&status, PgLakeXactRestCatalog->requestsHash); RestCatalogRequestPerTable *requestPerTable = NULL; while ((requestPerTable = hash_seq_search(&status)) != NULL) @@ -282,8 +292,9 @@ PostAllRestCatalogRequests(void) if (createTableRequest != NULL) { HttpResult httpResult = - SendRequestToRestCatalog(HTTP_POST, requestPerTable->tableRestUrl, - createTableRequest->body, PostHeadersWithAuth()); + SendRequestToRestCatalog(PgLakeXactRestCatalog->catalogOpts, HTTP_POST, + requestPerTable->tableRestUrl, createTableRequest->body, + PostHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts)); if (httpResult.status != 200) { @@ -298,8 +309,9 @@ PostAllRestCatalogRequests(void) else if (dropTableRequest != NULL) { HttpResult httpResult = - SendRequestToRestCatalog(HTTP_DELETE, requestPerTable->tableRestUrl, - NULL, DeleteHeadersWithAuth()); + SendRequestToRestCatalog(PgLakeXactRestCatalog->catalogOpts, HTTP_DELETE, + requestPerTable->tableRestUrl, NULL, + DeleteHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts)); if (httpResult.status != 204) { @@ -331,7 +343,7 @@ PostAllRestCatalogRequests(void) appendJsonKey(batchRequestBody, "table-changes"); appendStringInfo(batchRequestBody, "["); /* start array of changes */ - hash_seq_init(&status, RestCatalogRequestsHash); + hash_seq_init(&status, PgLakeXactRestCatalog->requestsHash); while ((requestPerTable = hash_seq_search(&status)) != NULL) { @@ -417,8 +429,11 @@ PostAllRestCatalogRequests(void) appendStringInfoChar(batchRequestBody, ']'); /* close table-changes */ appendStringInfoChar(batchRequestBody, '}'); /* close json body */ - char *url = psprintf(REST_CATALOG_TRANSACTION_COMMIT, RestCatalogHost, catalogName); - HttpResult httpResult = SendRequestToRestCatalog(HTTP_POST, url, batchRequestBody->data, PostHeadersWithAuth()); + char *url = psprintf(REST_CATALOG_TRANSACTION_COMMIT, + PgLakeXactRestCatalog->catalogOpts->host, catalogName); + HttpResult httpResult = SendRequestToRestCatalog(PgLakeXactRestCatalog->catalogOpts, HTTP_POST, + url, batchRequestBody->data, + PostHeadersWithAuth(PgLakeXactRestCatalog->catalogOpts)); if (httpResult.status != 204) { @@ -427,7 +442,7 @@ PostAllRestCatalogRequests(void) } /* - * Switch back to old context from PgLakeXactCommitContext. + * Switch back to old context from commitContext. */ MemoryContextSwitchTo(oldContext); } @@ -541,61 +556,128 @@ InitTableMetadataTrackerHashIfNeeded(void) } /* - * InitTableMetadataTrackerHashIfNeeded is a helper function to manage the initialization - * of the hash. We allocate the hash and entries in TopTransactionContext. + * InitRestCatalogRequestsHashIfNeeded allocates the per-transaction + * PgLakeXactRestCatalog context on first use. Everything is placed in + * TopTransactionContext so it survives until XACT_EVENT_COMMIT and is + * cleaned up automatically at transaction end. */ static void InitRestCatalogRequestsHashIfNeeded(void) { - if (RestCatalogRequestsHash == NULL) - { - /* - * They always updated together. - */ - Assert(PgLakeXactCommitContext == NULL); + if (PgLakeXactRestCatalog != NULL) + return; - /* - * First allocate 1MB memory context to avoid palloc() in XACT_COMMIT - * as much as possible. Only with very large REST catalog requests we - * might need to palloc() in XACT_COMMIT, which is still better than - * always palloc()ing in XACT_COMMIT, reducing the risk of OOM - * significantly. These very large requests might happen when there - * are many tables modified in a single transaction, likely > 100 - * tables. We allocate in TopTransactionContext to preserve the - * context until the end of the transaction, and let it be cleaned up - * automatically at transaction end. - */ - PgLakeXactCommitContext = - AllocSetContextCreateInternal(TopTransactionContext, - "PgLakeXactCommitContext", - ONE_MB, ONE_MB, ONE_MB); - Assert(MemoryContextMemAllocated(PgLakeXactCommitContext, true) == ONE_MB); + MemoryContext oldctx = MemoryContextSwitchTo(TopTransactionContext); - HASHCTL ctl; + PgLakeXactRestCatalog = palloc0(sizeof(PgLakeXactRestCatalogContext)); - MemSet(&ctl, 0, sizeof(ctl)); - ctl.keysize = sizeof(Oid); - ctl.entrysize = sizeof(RestCatalogRequestPerTable); - ctl.hash = oid_hash; + /* + * Pre-allocate 1MB memory context to avoid palloc() in XACT_COMMIT as + * much as possible. Only with very large REST catalog requests we might + * need to palloc() in XACT_COMMIT, which is still better than always + * palloc()ing in XACT_COMMIT, reducing the risk of OOM significantly. + * These very large requests might happen when there are many tables + * modified in a single transaction, likely > 100 tables. + */ + PgLakeXactRestCatalog->commitContext = + AllocSetContextCreateInternal(TopTransactionContext, + "PgLakeXactCommitContext", + ONE_MB, ONE_MB, ONE_MB); + Assert(MemoryContextMemAllocated(PgLakeXactRestCatalog->commitContext, true) == ONE_MB); - /* - * We prefer to allocate everything in TopTransactionContext, not in - * PgLakeXactCommitContext, because we preserve - * PgLakeXactCommitContext mostly for REST API request bodies to avoid - * palloc() in XACT_COMMIT. - */ - ctl.hcxt = TopTransactionContext; + HASHCTL ctl; - RestCatalogRequestsHash = hash_create("Rest Catalog Requests", - 32, &ctl, - HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); + MemSet(&ctl, 0, sizeof(ctl)); + ctl.keysize = sizeof(Oid); + ctl.entrysize = sizeof(RestCatalogRequestPerTable); + ctl.hash = oid_hash; + + /* + * We prefer to allocate the hash in TopTransactionContext, not in + * commitContext, because we reserve commitContext mostly for REST API + * request bodies to avoid palloc() in XACT_COMMIT. + */ + ctl.hcxt = TopTransactionContext; + + PgLakeXactRestCatalog->requestsHash = + hash_create("Rest Catalog Requests", + 32, &ctl, + HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); + + MemoryContextSwitchTo(oldctx); +} + + +/* + * BindRelationToXactRestCatalog binds the current transaction to the REST + * catalog associated with `relationId`, failing fast if a *different* REST + * catalog was already locked in for this transaction. + * + * Semantics: + * - For relations that are not REST-backed writable iceberg tables: no-op. + * - For the first REST-backed write of the transaction: pre-resolves the + * full catalog options and stashes them in TopTransactionContext, so + * subsequent calls within the same transaction can be checked without + * touching pg_foreign_server again at XACT_EVENT_COMMIT. + * - For any subsequent REST-backed write whose catalog differs from the + * locked-in one: raises ERRCODE_FEATURE_NOT_SUPPORTED *before* any + * Parquet data is written to S3. + * + * Called at the top of every DML entry point that can mutate REST-backed + * iceberg tables: postgresBeginForeignModify() for row-by-row DML, and + * AddQueryResultToTable() for the INSERT...SELECT and COPY..FROM pushdown + * paths. DDL paths (CREATE TABLE / DROP TABLE) reach the same protection + * indirectly via RecordRestCatalogRequestInTx(), which is invoked + * synchronously at statement time from the utility hook. + */ +void +BindRelationToXactRestCatalog(Oid relationId) +{ + if (!IsPgLakeIcebergForeignTableById(relationId) || + GetIcebergCatalogType(relationId) != REST_CATALOG_READ_WRITE) + return; + + /* + * Resolve the relation's catalog options up front. We need the full + * resolved struct (host, credentials, ...), not just the user-facing + * identifier, because XACT_EVENT_PRE_COMMIT reuses these fields when + * issuing the REST API requests and is not allowed to do syscache lookups + * by then. + */ + RestCatalogOptions *resolvedOpts = GetRestCatalogOptionsForRelation(relationId); + + InitRestCatalogRequestsHashIfNeeded(); + + if (PgLakeXactRestCatalog->catalogOpts == NULL) + { + PgLakeXactRestCatalog->catalogOpts = + CopyRestCatalogOptions(TopTransactionContext, resolvedOpts); + return; } + + /* + * Identity is the iceberg_catalog server OID, not the user-typed name. + * Two requests for the same physical server -- whether the user spelled + * the catalog as 'rest', 'REST', or as the underlying built-in server + * name on different statements -- collapse to the same OID and are + * treated as one catalog. The user-facing names are still reported in + * the error message. + */ + if (PgLakeXactRestCatalog->catalogOpts->serverOid != resolvedOpts->serverOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot modify tables from different REST catalogs " + "in the same transaction"), + errdetail("This transaction already targets catalog \"%s\", " + "but the current statement targets \"%s\".", + PgLakeXactRestCatalog->catalogOpts->catalog, + resolvedOpts->catalog))); } /* -* RecordRestCatalogRequestInTx records a REST catalog request to be sent at post-commit. -*/ + * RecordRestCatalogRequestInTx records a REST catalog request to be sent at post-commit. + */ void RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationType, const char *body) @@ -604,7 +686,7 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT bool isFound = false; RestCatalogRequestPerTable *requestPerTable = - hash_search(RestCatalogRequestsHash, + hash_search(PgLakeXactRestCatalog->requestsHash, &relationId, HASH_ENTER, &isFound); if (!isFound || !requestPerTable->isValid) @@ -612,6 +694,42 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT memset(requestPerTable, 0, sizeof(RestCatalogRequestPerTable)); requestPerTable->relationId = relationId; + /* Resolve the options for this relation's REST catalog */ + RestCatalogOptions *resolvedOpts = GetRestCatalogOptionsForRelation(relationId); + + /* + * DDL paths (CREATE TABLE / DROP TABLE) call us directly from the + * utility hook and may be the very first thing to touch a REST + * catalog in this transaction, so this branch is still genuinely + * reached. DML paths reach us only from XACT_EVENT_PRE_COMMIT, by + * which time BindRelationToXactRestCatalog() has already populated + * catalogOpts. + */ + if (PgLakeXactRestCatalog->catalogOpts == NULL) + { + PgLakeXactRestCatalog->catalogOpts = + CopyRestCatalogOptions(TopTransactionContext, resolvedOpts); + } + + /* + * Belt-and-suspenders check. All DML and DDL entry points either + * bind through BindRelationToXactRestCatalog() at statement time or + * populate catalogOpts via the branch above, so in practice we never + * reach here with a mismatched catalog. Kept as a last line of + * defense for any future code path that forgets to do so. See the + * companion comment in BindRelationToXactRestCatalog for why identity + * is the server OID, not the user-typed name. + */ + else if (PgLakeXactRestCatalog->catalogOpts->serverOid != resolvedOpts->serverOid) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot modify tables from different REST catalogs " + "in the same transaction"), + errdetail("This transaction already targets catalog server " + "\"%s\", but table %u belongs to \"%s\".", + PgLakeXactRestCatalog->catalogOpts->catalog, relationId, + resolvedOpts->catalog))); + requestPerTable->catalogName = MemoryContextStrdup(TopTransactionContext, GetRestCatalogName(relationId)); requestPerTable->catalogNamespace = @@ -628,7 +746,7 @@ RecordRestCatalogRequestInTx(Oid relationId, RestCatalogOperationType operationT requestPerTable->tableRestUrl = MemoryContextStrdup(TopTransactionContext, psprintf(REST_CATALOG_TABLE, - RestCatalogHost, + resolvedOpts->host, requestPerTable->urlEncodedCatalogName, requestPerTable->urlEncodedCatalogNamespace, requestPerTable->urlEncodedCatalogTableName)); diff --git a/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py new file mode 100644 index 00000000..3131d65f --- /dev/null +++ b/pg_lake_table/tests/pytests/test_iceberg_catalog_server.py @@ -0,0 +1,1093 @@ +import pytest +from utils_pytest import * + + +# ── FDW --------------------------─────────────────────────────────────────- + + +def test_iceberg_catalog_fdw_exists(pg_conn, extension): + """The iceberg_catalog FDW should be created by the extension.""" + result = run_query( + "SELECT fdwname FROM pg_foreign_data_wrapper WHERE fdwname = 'iceberg_catalog'", + pg_conn, + ) + assert len(result) == 1 + assert result[0]["fdwname"] == "iceberg_catalog" + + +def test_iceberg_catalog_fdw_has_no_handler(pg_conn, extension): + """iceberg_catalog is configuration-only, so it should have no handler.""" + result = run_query( + "SELECT fdwhandler FROM pg_foreign_data_wrapper WHERE fdwname = 'iceberg_catalog'", + pg_conn, + ) + assert result[0]["fdwhandler"] == 0 + + +# ── CREATE SERVER with valid options ─────────────────────────────────────── + + +def test_create_rest_server_with_all_options(superuser_conn, extension): + """All documented options should be accepted for a REST-type server. + Uses a mix of quoted upper-case and plain lower-case option names to + verify case-insensitive matching.""" + run_command( + """ + CREATE SERVER test_rest_all_opts TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS ( + "Rest_Endpoint" 'http://localhost:8181', + "REST_AUTH_TYPE" 'OAuth2', + "OAuth_Endpoint" 'http://localhost:8181/oauth/tokens', + scope 'PRINCIPAL_ROLE:ALL', + enable_vended_credentials 'true', + "Location_Prefix" 's3://bucket/prefix', + catalog_name 'my_catalog', + "Client_Id" 'test-id', + "Client_Secret" 'test-secret' + ) + """, + superuser_conn, + ) + superuser_conn.rollback() + + +def test_create_rest_server_no_options(superuser_conn, extension): + """A server with no options is valid; all settings fall back to GUCs.""" + run_command( + """ + CREATE SERVER test_rest_no_opts TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + ) + superuser_conn.rollback() + + +def test_lake_write_user_can_create_server(pg_conn, extension): + """A non-superuser with lake_write should be able to create a server.""" + run_command( + """ + CREATE SERVER test_lake_write_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + pg_conn, + ) + pg_conn.rollback() + + +def test_create_rest_server_minimal(superuser_conn, extension): + """A server with just rest_endpoint should be accepted.""" + run_command( + """ + CREATE SERVER test_rest_minimal TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + superuser_conn.rollback() + + +def test_create_server_horizon_auth(superuser_conn, extension): + """Horizon auth type should be accepted.""" + run_command( + """ + CREATE SERVER test_horizon TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS ( + rest_endpoint 'https://horizon.example.com', + rest_auth_type 'horizon', + client_secret 'secret' + ) + """, + superuser_conn, + ) + superuser_conn.rollback() + + +# ── CREATE SERVER with invalid options ───────────────────────────────────── + + +def test_reject_unknown_server_option(superuser_conn, extension): + """ + Unknown options should be rejected by the validator. + + Issued twice on the same connection because the validator caches the + hint string in a static; the second call must hit the cached path and + must still produce a well-formed hint (regression guard against the + hint being palloc'd in a per-statement memory context). + """ + EXPECTED_OPTIONS = [ + "rest_endpoint", + "rest_auth_type", + "oauth_endpoint", + "scope", + "enable_vended_credentials", + "location_prefix", + "catalog_name", + "client_id", + "client_secret", + ] + + for typo in ("bogus_option", "another_typo"): + err = run_command( + f""" + CREATE SERVER test_bad_opt_{typo} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181', {typo} 'x') + """, + superuser_conn, + raise_error=False, + ) + msg = str(err) + assert "invalid option" in msg + assert typo in msg + assert "Valid options are:" in msg + for opt in EXPECTED_OPTIONS: + assert opt in msg, f"hint missing {opt!r} on attempt {typo!r}: {msg}" + superuser_conn.rollback() + + +def test_reject_invalid_auth_type(superuser_conn, extension): + """Only 'oauth2', 'default', and 'horizon' are valid for rest_auth_type.""" + err = run_command( + """ + CREATE SERVER test_bad_auth TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181', rest_auth_type 'basic') + """, + superuser_conn, + raise_error=False, + ) + assert "invalid rest_auth_type" in str(err) + superuser_conn.rollback() + + +def test_reject_invalid_vended_creds(superuser_conn, extension): + """enable_vended_credentials must be a valid boolean.""" + err = run_command( + """ + CREATE SERVER test_bad_bool TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181', enable_vended_credentials 'maybe') + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + superuser_conn.rollback() + + +def test_reject_options_on_non_server(superuser_conn, extension): + """Options on the FDW itself should be rejected.""" + err = run_command( + """ + ALTER FOREIGN DATA WRAPPER iceberg_catalog OPTIONS (ADD rest_endpoint 'http://x') + """, + superuser_conn, + raise_error=False, + ) + assert "only valid for SERVER" in str(err) + superuser_conn.rollback() + + +# ── Option value validation ───────────────────────────────────────────────── + + +@pytest.mark.parametrize( + "option, bad_value, expected_error", + [ + ("rest_endpoint", "", "must not be empty"), + ("rest_endpoint", "localhost:8181", "URI scheme"), + ("oauth_endpoint", "", "must not be empty"), + ("oauth_endpoint", "localhost/oauth/tokens", "URI scheme"), + ("location_prefix", "", "must not be empty"), + ("location_prefix", "my-bucket/prefix", "URI scheme"), + ("catalog_name", "", "must not be empty"), + ("client_id", "", "must not be empty"), + ("client_secret", "", "must not be empty"), + ("scope", "", "must not be empty"), + ], +) +def test_reject_bad_option_values( + superuser_conn, extension, option, bad_value, expected_error +): + err = run_command( + f""" + CREATE SERVER test_bad_val TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS ({option} '{bad_value}') + """, + superuser_conn, + raise_error=False, + ) + assert err is not None, f"Expected error for {option}='{bad_value}'" + assert expected_error in str(err), f"Expected '{expected_error}' in: {err}" + superuser_conn.rollback() + + +# ── CREATE FOREIGN TABLE on iceberg_catalog servers is blocked ────────────── + + +def test_reject_create_foreign_table_on_iceberg_catalog_server( + superuser_conn, extension +): + """CREATE FOREIGN TABLE on an iceberg_catalog server is blocked.""" + run_command( + """ + CREATE SERVER test_ft_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + err = run_command( + """ + CREATE FOREIGN TABLE test_ft_tbl (id int) + SERVER test_ft_srv + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot create foreign tables on iceberg_catalog server" in str(err) + superuser_conn.rollback() + + +# ── ALTER SERVER ─────────────────────────────────────────────────────────── + + +def test_alter_server_add_option(superuser_conn, extension): + """ALTER SERVER should allow adding new options.""" + run_command( + """ + CREATE SERVER test_alter TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + + run_command( + """ + ALTER SERVER test_alter OPTIONS (ADD scope 'PRINCIPAL_ROLE:ADMIN') + """, + superuser_conn, + ) + + result = run_query( + """ + SELECT srvoptions FROM pg_foreign_server WHERE srvname = 'test_alter' + """, + superuser_conn, + ) + opts = result[0]["srvoptions"] + assert "scope=PRINCIPAL_ROLE:ADMIN" in opts + superuser_conn.rollback() + + +def test_alter_server_set_option(superuser_conn, extension): + """ALTER SERVER should allow changing existing options.""" + run_command( + """ + CREATE SERVER test_alter_set TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + + run_command( + """ + ALTER SERVER test_alter_set OPTIONS (SET rest_endpoint 'http://new-host:8181') + """, + superuser_conn, + ) + + result = run_query( + """ + SELECT srvoptions FROM pg_foreign_server WHERE srvname = 'test_alter_set' + """, + superuser_conn, + ) + opts = result[0]["srvoptions"] + assert "rest_endpoint=http://new-host:8181" in opts + superuser_conn.rollback() + + +def test_alter_server_reject_unknown_option(superuser_conn, extension): + """ALTER SERVER should reject unknown options.""" + run_command( + """ + CREATE SERVER test_alter_bad TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + + err = run_command( + """ + ALTER SERVER test_alter_bad OPTIONS (ADD unknown_opt 'x') + """, + superuser_conn, + raise_error=False, + ) + assert "invalid option" in str(err) + superuser_conn.rollback() + + +# ── DROP SERVER ──────────────────────────────────────────────────────────── + + +def test_drop_server(superuser_conn, extension): + """DROP SERVER should work for iceberg_catalog servers.""" + run_command( + """ + CREATE SERVER test_drop_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + + run_command("DROP SERVER test_drop_srv", superuser_conn) + + result = run_query( + "SELECT count(*) FROM pg_foreign_server WHERE srvname = 'test_drop_srv'", + superuser_conn, + ) + assert result[0]["count"] == 0 + superuser_conn.rollback() + + +# ── Using a server-based catalog in CREATE TABLE ─────────────────────────── + + +def test_create_table_with_server_catalog( + pg_conn, superuser_conn, s3, extension, with_default_location +): + """CREATE TABLE ... USING iceberg WITH (catalog = '') should + recognize the catalog option as a server-based REST catalog.""" + run_command( + """ + CREATE SERVER test_srv_catalog TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS ( + rest_endpoint 'http://localhost:8181', + client_id 'id', + client_secret 'secret' + ) + """, + superuser_conn, + ) + run_command( + "GRANT USAGE ON FOREIGN SERVER test_srv_catalog TO PUBLIC", + superuser_conn, + ) + superuser_conn.commit() + + err = run_command( + """ + CREATE TABLE test_srv_tbl () + USING iceberg + WITH (catalog = 'test_srv_catalog', read_only = 'true', + catalog_namespace = 'ns', catalog_table_name = 'tbl') + """, + pg_conn, + raise_error=False, + ) + # The REST endpoint is fake, so we expect a connection error, NOT a + # "invalid catalog option" error. This proves the server was resolved. + assert err is not None + assert "invalid catalog option" not in str(err) + assert "permission denied" not in str(err) + pg_conn.rollback() + + run_command("DROP SERVER test_srv_catalog CASCADE", superuser_conn) + superuser_conn.commit() + + +def test_create_table_requires_usage_on_catalog_server( + pg_conn, superuser_conn, s3, extension, with_default_location +): + """A non-superuser without USAGE on the catalog server must be + denied when creating a table that references it.""" + run_command( + """ + CREATE SERVER test_no_usage_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + superuser_conn.commit() + + err = run_command( + """ + CREATE TABLE test_no_usage_tbl (id bigint) + USING iceberg + WITH (catalog = 'test_no_usage_srv') + """, + pg_conn, + raise_error=False, + ) + assert err is not None + assert "permission denied for foreign server" in str(err).lower() + pg_conn.rollback() + + run_command("DROP SERVER test_no_usage_srv", superuser_conn) + superuser_conn.commit() + + +def test_invalid_catalog_name_errors(pg_conn, s3, extension, with_default_location): + """A catalog name that is neither a known literal nor a valid server should error.""" + err = run_command( + """ + CREATE TABLE test_bad_cat () + USING iceberg + WITH (catalog = 'nonexistent_server', read_only = 'true') + """, + pg_conn, + raise_error=False, + ) + assert "invalid catalog option" in str(err) + pg_conn.rollback() + + +def test_non_iceberg_catalog_server_rejected( + pg_conn, superuser_conn, s3, extension, with_default_location +): + """A foreign server not under iceberg_catalog FDW should not be accepted + as a catalog value.""" + err = run_command( + """ + CREATE TABLE test_wrong_fdw () + USING iceberg + WITH (catalog = 'pg_lake_iceberg', read_only = 'true') + """, + pg_conn, + raise_error=False, + ) + assert "invalid catalog option" in str(err) + pg_conn.rollback() + + +# ── Backward compatibility ───────────────────────────────────────────────── + + +def test_catalog_rest_literal_still_works( + pg_conn, s3, extension, with_default_location +): + """catalog='rest' (literal) should still work via GUC fallback.""" + err = run_command( + """ + CREATE TABLE test_rest_literal () + USING iceberg + WITH (catalog = 'rest', read_only = 'true', + catalog_namespace = 'ns', catalog_table_name = 'tbl') + """, + pg_conn, + raise_error=False, + ) + # Will fail because REST GUCs aren't configured, but should NOT fail + # with "invalid catalog option" + if err is not None: + assert "invalid catalog option" not in str(err) + pg_conn.rollback() + + +def test_catalog_postgres_literal_still_works( + pg_conn, s3, extension, with_default_location +): + """catalog='postgres' (literal) should still work.""" + run_command( + """ + CREATE TABLE test_pg_literal (id int) + USING iceberg + WITH (catalog = 'postgres') + """, + pg_conn, + ) + pg_conn.rollback() + + +def test_catalog_object_store_literal_still_works( + pg_conn, + superuser_conn, + s3, + extension, + with_default_location, + adjust_object_store_settings, +): + """catalog='object_store' (literal) should still work.""" + run_command( + """ + CREATE TABLE test_os_literal (id int) + USING iceberg + WITH (catalog = 'object_store') + """, + pg_conn, + ) + pg_conn.rollback() + + +# ── Protection of reserved catalog names ─────────────────────────────────── + + +def test_reject_create_server_type_postgres(superuser_conn, extension): + """Users cannot create a new server with TYPE 'postgres'.""" + err = run_command( + """ + CREATE SERVER my_postgres TYPE 'postgres' + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot create iceberg_catalog server with TYPE 'postgres'" in str(err) + superuser_conn.rollback() + + +def test_reject_create_server_type_object_store(superuser_conn, extension): + """Users cannot create a new server with TYPE 'object_store'.""" + err = run_command( + """ + CREATE SERVER my_obj_store TYPE 'object_store' + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot create iceberg_catalog server with TYPE 'object_store'" in str(err) + superuser_conn.rollback() + + +def test_reject_create_server_non_rest_type(superuser_conn, extension): + """Any TYPE other than 'rest' is rejected for user-created iceberg_catalog servers.""" + err = run_command( + """ + CREATE SERVER my_server TYPE 'something_else' + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "iceberg_catalog server requires TYPE 'rest'" in str(err) + superuser_conn.rollback() + + +def test_reject_create_server_without_type(superuser_conn, extension): + """CREATE SERVER without TYPE is rejected for iceberg_catalog servers.""" + err = run_command( + """ + CREATE SERVER my_server + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "iceberg_catalog server requires TYPE 'rest'" in str(err) + superuser_conn.rollback() + + +def test_reject_create_server_reserved_name(superuser_conn, extension): + """CREATE SERVER with a reserved catalog name (case-insensitive) is blocked.""" + reserved_names = [ + "Postgres", + "OBJECT_STORE", + "ReSt", + ] + for name in reserved_names: + err = run_command( + f""" + CREATE SERVER "{name}" TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + """, + superuser_conn, + raise_error=False, + ) + assert err is not None, f"Expected error for reserved name '{name}'" + assert "is reserved" in str(err) + superuser_conn.rollback() + + +def test_reject_rename_to_reserved_name(superuser_conn, extension): + """Renaming a user-created server TO a reserved name is blocked.""" + run_command( + """ + CREATE SERVER tmp_rename_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + for reserved in ["POSTGRES", "Object_Store", "REST"]: + err = run_command( + f'ALTER SERVER tmp_rename_srv RENAME TO "{reserved}"', + superuser_conn, + raise_error=False, + ) + assert err is not None, f"Expected error for renaming to '{reserved}'" + assert "reserved for the extension-owned catalog" in str(err) + superuser_conn.rollback() + run_command( + """ + CREATE SERVER tmp_rename_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + superuser_conn.rollback() + + +def test_allow_drop_user_created_server(superuser_conn, extension): + """DROP SERVER on a user-created server should work fine.""" + run_command( + """ + CREATE SERVER user_rest_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + run_command("DROP SERVER user_rest_srv", superuser_conn) + superuser_conn.rollback() + + +def test_reject_rename_iceberg_catalog_server(superuser_conn, extension): + """Renaming an iceberg_catalog server is blocked because dependent tables + store the server name as a string option in ftoptions.""" + run_command( + """ + CREATE SERVER user_rename_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + err = run_command( + "ALTER SERVER user_rename_srv RENAME TO user_renamed_srv", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot rename iceberg_catalog server" in str(err) + superuser_conn.rollback() + + +def test_allow_owner_change_user_created_server(superuser_conn, extension): + """ALTER SERVER ... OWNER TO on a user-created server should work fine.""" + run_command( + """ + CREATE SERVER user_owner_srv TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + run_command( + "ALTER SERVER user_owner_srv OWNER TO CURRENT_USER", + superuser_conn, + ) + superuser_conn.rollback() + + +# ── default_catalog GUC with user-created servers ────────────────────────── + + +def test_set_default_catalog_to_user_created_rest_server(superuser_conn, extension): + """SET pg_lake_iceberg.default_catalog should accept a user-created + iceberg_catalog server with TYPE 'rest'.""" + run_command( + """ + CREATE SERVER my_rest_catalog TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + + run_command( + "SET pg_lake_iceberg.default_catalog = 'my_rest_catalog'", + superuser_conn, + ) + + result = run_query( + "SHOW pg_lake_iceberg.default_catalog", + superuser_conn, + ) + assert result[0]["pg_lake_iceberg.default_catalog"] == "my_rest_catalog" + superuser_conn.rollback() + + +def test_set_default_catalog_rejects_nonexistent_server(pg_conn, extension): + """SET pg_lake_iceberg.default_catalog should reject a name that is + neither a built-in literal nor an existing server.""" + err = run_command( + "SET pg_lake_iceberg.default_catalog = 'no_such_server'", + pg_conn, + raise_error=False, + ) + assert err is not None + assert "user-created iceberg_catalog server" in str(err) + pg_conn.rollback() + + +def test_alter_system_default_catalog_defers_validation( + superuser_conn, pg_conn, extension +): + """The GUC check hook for pg_lake_iceberg.default_catalog cannot do catalog + lookups during SIGHUP reload (!IsTransactionState()), so it accepts the + value on faith — mirroring PostgreSQL's check_default_tablespace. + + Sequence: create a server, ALTER SYSTEM SET to it (passes in-transaction + validation), drop the server, then pg_reload_conf() re-applies the + now-stale name. The check hook must let it through. A subsequent + CREATE TABLE must fail at runtime.""" + run_command( + """ + CREATE SERVER alter_sys_cat TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + superuser_conn.commit() + + superuser_conn.autocommit = True + run_command( + "ALTER SYSTEM SET pg_lake_iceberg.default_catalog = 'alter_sys_cat'", + superuser_conn, + ) + run_command("SELECT pg_reload_conf()", superuser_conn) + run_command("SELECT pg_sleep(0.2)", superuser_conn) + + result = run_query( + "SHOW pg_lake_iceberg.default_catalog", + superuser_conn, + ) + assert result[0][0] == "alter_sys_cat" + + run_command("DROP SERVER alter_sys_cat", superuser_conn) + run_command("SELECT pg_reload_conf()", superuser_conn) + run_command("SELECT pg_sleep(0.2)", superuser_conn) + + result = run_query( + "SHOW pg_lake_iceberg.default_catalog", + superuser_conn, + ) + assert result[0][0] == "alter_sys_cat" + superuser_conn.autocommit = False + + err = run_command( + "CREATE TABLE alter_sys_test (id bigint) USING iceberg", + pg_conn, + raise_error=False, + ) + assert err is not None + assert "invalid catalog option" in str(err).lower() + pg_conn.rollback() + + superuser_conn.autocommit = True + run_command( + "ALTER SYSTEM RESET pg_lake_iceberg.default_catalog", + superuser_conn, + ) + run_command("SELECT pg_reload_conf()", superuser_conn) + superuser_conn.autocommit = False + + +# ── Case-sensitive server names ──────────────────────────────────────────── + + +def test_case_sensitive_server_names(superuser_conn, extension): + """Server names are case-sensitive: "test_cs" and "TEST_CS" are distinct + servers that can coexist with different options.""" + run_command( + """ + CREATE SERVER test_cs TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://host-lower:8181') + """, + superuser_conn, + ) + run_command( + """ + CREATE SERVER "TEST_CS" TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://host-upper:8181') + """, + superuser_conn, + ) + + lower_opts = run_query( + "SELECT srvoptions FROM pg_foreign_server WHERE srvname = 'test_cs'", + superuser_conn, + ) + upper_opts = run_query( + "SELECT srvoptions FROM pg_foreign_server WHERE srvname = 'TEST_CS'", + superuser_conn, + ) + + assert len(lower_opts) == 1 + assert len(upper_opts) == 1 + assert "host-lower" in str(lower_opts[0]["srvoptions"]) + assert "host-upper" in str(upper_opts[0]["srvoptions"]) + + run_command("DROP SERVER test_cs", superuser_conn) + run_command('DROP SERVER "TEST_CS"', superuser_conn) + superuser_conn.rollback() + + +# ── Built-in catalog servers ─────────────────────────────────────────────── + + +BUILTIN_CATALOG_SERVERS = [ + ("pg_lake_postgres_catalog", "postgres"), + ("pg_lake_object_store_catalog", "object_store"), + ("pg_lake_rest_catalog", "rest"), +] + + +def test_builtin_catalog_servers_exist(pg_conn, extension): + """The three built-in iceberg_catalog servers are pre-created by the + extension and survive as anchors for pg_depend edges.""" + result = run_query( + """ + SELECT srvname, srvtype + FROM pg_foreign_server s + JOIN pg_foreign_data_wrapper fdw ON s.srvfdw = fdw.oid + WHERE fdw.fdwname = 'iceberg_catalog' + AND srvname LIKE 'pg_lake_%_catalog' + ORDER BY srvname + """, + pg_conn, + ) + names_types = [(row["srvname"], row["srvtype"]) for row in result] + assert names_types == sorted(BUILTIN_CATALOG_SERVERS) + + +def test_builtin_servers_are_extension_owned(pg_conn, extension): + """Built-in servers are members of the pg_lake_iceberg extension so + they get included in pg_dump's CREATE EXTENSION shadow rather than + emitted as standalone CREATE SERVER statements.""" + result = run_query( + """ + SELECT srvname + FROM pg_foreign_server s + JOIN pg_depend d ON d.objid = s.oid + JOIN pg_extension e ON e.oid = d.refobjid + WHERE e.extname = 'pg_lake_iceberg' + AND d.deptype = 'e' + AND d.classid = 'pg_foreign_server'::regclass + ORDER BY srvname + """, + pg_conn, + ) + assert [r["srvname"] for r in result] == [ + name for name, _ in sorted(BUILTIN_CATALOG_SERVERS) + ] + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_reject_create_server_with_builtin_long_name( + long_name, _short_name, superuser_conn, extension +): + """CREATE SERVER with the internal long name is blocked; the long + names are implementation details and must never be addressable as + a user-typed server name.""" + err = run_command( + f""" + CREATE SERVER {long_name} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "reserved for an internal" in str(err) + superuser_conn.rollback() + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_reject_rename_to_builtin_long_name( + long_name, _short_name, superuser_conn, extension +): + """Renaming a user-created server TO an internal long name is blocked.""" + run_command( + """ + CREATE SERVER rn_long_src TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint 'http://localhost:8181') + """, + superuser_conn, + ) + err = run_command( + f"ALTER SERVER rn_long_src RENAME TO {long_name}", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "reserved for an internal" in str(err) + superuser_conn.rollback() + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_alter_options_on_builtin_server_blocked( + long_name, _short_name, superuser_conn, extension +): + """ALTER SERVER OPTIONS on a built-in server is unconditionally blocked + — built-ins are immutable structural anchors and all configuration + lives in GUCs.""" + err = run_command( + f"ALTER SERVER {long_name} OPTIONS (ADD rest_endpoint 'http://x:8181')", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot alter the built-in pg_lake_iceberg catalog server" in str(err) + superuser_conn.rollback() + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_alter_owner_on_builtin_server_blocked( + long_name, _short_name, superuser_conn, extension +): + """ALTER SERVER ... OWNER TO on a built-in server is blocked.""" + err = run_command( + f"ALTER SERVER {long_name} OWNER TO lake_write", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot change owner of the built-in" in str(err) + superuser_conn.rollback() + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_drop_builtin_server_blocked(long_name, _short_name, superuser_conn, extension): + """DROP SERVER on a built-in server is blocked by PostgreSQL itself, + because the server is extension-owned (pg_depend deptype = 'e').""" + err = run_command(f"DROP SERVER {long_name}", superuser_conn, raise_error=False) + assert err is not None + assert "extension pg_lake_iceberg" in str(err) or "depends on" in str(err) + superuser_conn.rollback() + + +def test_reject_rename_builtin_server(superuser_conn, extension): + """Renaming the built-in servers is blocked by the iceberg_catalog + rename guard (which already blocks renaming any iceberg_catalog + server, built-in or user-created).""" + err = run_command( + "ALTER SERVER pg_lake_rest_catalog RENAME TO renamed_builtin", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot rename iceberg_catalog server" in str(err) + superuser_conn.rollback() + + +@pytest.mark.parametrize("long_name,_short_name", BUILTIN_CATALOG_SERVERS) +def test_reject_catalog_option_with_builtin_long_name( + long_name, _short_name, pg_conn, extension +): + """The catalog= option on CREATE TABLE must use the short names + ('rest', 'postgres', 'object_store'); the internal long names are + rejected with a clear error pointing to the short forms.""" + err = run_command( + f"CREATE TABLE long_name_tbl (id int) USING iceberg WITH (catalog='{long_name}')", + pg_conn, + raise_error=False, + ) + assert err is not None + assert "reserved for an internal" in str(err) + pg_conn.rollback() + + +def test_postgres_fdw_named_postgres_does_not_conflict(superuser_conn, extension): + """A pre-existing CREATE SERVER named 'postgres' (e.g. from postgres_fdw) + does not collide with pg_lake's built-in postgres catalog. The user-facing + short name 'postgres' maps internally to 'pg_lake_postgres_catalog', so the + two coexist without interaction.""" + err = run_command( + "CREATE EXTENSION IF NOT EXISTS postgres_fdw", superuser_conn, raise_error=False + ) + if err is not None: + pytest.skip(f"postgres_fdw not available: {err}") + + run_command( + """ + CREATE SERVER postgres FOREIGN DATA WRAPPER postgres_fdw + OPTIONS (host 'localhost', port '5432', dbname 'postgres') + """, + superuser_conn, + ) + superuser_conn.commit() + + try: + coexist = run_query( + """ + SELECT srvname, fdw.fdwname + FROM pg_foreign_server s + JOIN pg_foreign_data_wrapper fdw ON s.srvfdw = fdw.oid + WHERE srvname IN ('postgres', 'pg_lake_postgres_catalog') + ORDER BY srvname + """, + superuser_conn, + ) + rows = [(r["srvname"], r["fdwname"]) for r in coexist] + assert ("postgres", "postgres_fdw") in rows + assert ("pg_lake_postgres_catalog", "iceberg_catalog") in rows + finally: + run_command("DROP SERVER postgres", superuser_conn) + superuser_conn.commit() + + +def test_pg_depend_edge_for_builtin_postgres_catalog_table( + pg_conn, s3, extension, with_default_location +): + """A table created with catalog='postgres' (the short reserved name) + records a pg_depend edge against the built-in pg_lake_postgres_catalog + server. This is what makes DROP EXTENSION CASCADE clean up such + tables, and is the structural reason for pre-creating the built-in + servers in the first place.""" + run_command( + """ + CREATE TABLE pg_depend_tbl (id int) + USING iceberg + WITH (catalog = 'postgres') + """, + pg_conn, + ) + pg_conn.commit() + + try: + result = run_query( + """ + SELECT s.srvname + FROM pg_class c + JOIN pg_depend d ON d.objid = c.oid + AND d.classid = 'pg_class'::regclass + AND d.refclassid = 'pg_foreign_server'::regclass + JOIN pg_foreign_server s ON s.oid = d.refobjid + JOIN pg_foreign_data_wrapper fdw ON s.srvfdw = fdw.oid + WHERE c.relname = 'pg_depend_tbl' + AND fdw.fdwname = 'iceberg_catalog' + """, + pg_conn, + ) + assert len(result) == 1 + assert result[0]["srvname"] == "pg_lake_postgres_catalog" + finally: + run_command("DROP TABLE pg_depend_tbl", pg_conn) + pg_conn.commit() diff --git a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py index 5789b2ea..cd1dd88c 100644 --- a/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py +++ b/pg_lake_table/tests/pytests/test_modify_iceberg_rest_table.py @@ -5,14 +5,21 @@ from helpers.spark import * import json import re +import threading +import time from test_writable_iceberg_common import * @pytest.mark.parametrize( - "manifest_min_count_to_merge, target_manifest_size_kb, max_snapshot_age_params ", + "manifest_min_count_to_merge, target_manifest_size_kb, max_snapshot_age_params", manifest_snapshot_settings, ) +@pytest.mark.parametrize( + "create_iceberg_rest_table_parametrized", + ["rest", "user_server"], + indirect=True, +) def test_writable_rest_iceberg_table( installcheck, install_iceberg_to_duckdb, @@ -26,7 +33,7 @@ def test_writable_rest_iceberg_table( target_manifest_size_kb, max_snapshot_age_params, allow_iceberg_guc_perms, - create_iceberg_rest_table, + create_iceberg_rest_table_parametrized, create_test_helper_functions, create_http_helper_functions, ): @@ -48,7 +55,7 @@ def test_writable_rest_iceberg_table( ) superuser_conn.commit() - TABLE_NAME = create_iceberg_rest_table + TABLE_NAME = create_iceberg_rest_table_parametrized # show that we can read empty tables query = f"SELECT count(*) FROM {TABLE_NAMESPACE}.{TABLE_NAME}" @@ -565,3 +572,1172 @@ def get_rest_table_metadata_location(encoded_namespace, encoded_table_name, pg_c status, json_str, headers = res[0] metadata = json.loads(json_str) return metadata["metadata"]["location"] + + +server_option_override_params = [ + pytest.param( + "rest_endpoint", + "pg_lake_iceberg.rest_catalog_host", + "http://localhost:1", + id="rest_endpoint", + ), + pytest.param( + "client_id", + "pg_lake_iceberg.rest_catalog_client_id", + "wrong_id", + id="client_id", + ), + pytest.param( + "client_secret", + "pg_lake_iceberg.rest_catalog_client_secret", + "wrong_secret", + id="client_secret", + ), + pytest.param( + "location_prefix", + "pg_lake_iceberg.default_location_prefix", + "s3://nonexistent-broken-bucket-xyz", + id="location_prefix", + ), + pytest.param( + "catalog_name", + None, + None, + id="catalog_name", + ), +] + + +@pytest.mark.parametrize( + "option_name, guc_name, broken_guc_value", server_option_override_params +) +def test_server_option_overrides_guc( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + polaris_session, + create_http_helper_functions, + option_name, + guc_name, + broken_guc_value, +): + """ + Verify that each overridable server option takes precedence over + its corresponding GUC. For most options the GUC is set to a broken + value while the server option is set to the correct value, then we + prove the table works. For catalog_name the server option is set to + a wrong value and we prove it is used (instead of the default). + """ + if installcheck: + return + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + VALID_PREFIX = f"s3://{TEST_BUCKET}/" + + SERVER_NAME = f"rest_opt_override_{option_name}" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = f"opt_override_{option_name}" + + server_options = { + "rest_endpoint": endpoint, + "client_id": client_id, + "client_secret": client_secret, + "location_prefix": VALID_PREFIX, + } + + if option_name == "catalog_name": + server_options["catalog_name"] = "nonexistent_catalog" + + options_sql = ", ".join(f"{k} '{v}'" for k, v in server_options.items()) + + if guc_name is not None: + run_command( + f"SET {guc_name} TO '{broken_guc_value}'", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS ({options_sql}) + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + if option_name == "catalog_name": + err = run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} () " + f"USING iceberg WITH (catalog='{SERVER_NAME}', read_only='true')", + pg_conn, + raise_error=False, + ) + assert err is not None, ( + "Expected failure because server's catalog_name 'nonexistent_catalog' " + "should be used instead of the default database name" + ) + pg_conn.rollback() + else: + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint, value text) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + ) + pg_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} " + f"SELECT i, i::text FROM generate_series(1, 10) i", + pg_conn, + ) + pg_conn.commit() + + results = run_query(f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", pg_conn) + assert results[0][0] == 10 + + if option_name == "location_prefix": + table_location = get_rest_table_metadata_location( + SCHEMA_NAME, TABLE_NAME, pg_conn + ) + stripped_prefix = VALID_PREFIX.rstrip("/") + assert table_location.startswith(stripped_prefix), ( + f"Expected location to start with server prefix " + f"'{stripped_prefix}', got '{table_location}'" + ) + assert broken_guc_value not in table_location + assert ( + "//" not in table_location.split("://", 1)[1] + ), f"Double slash found in location path: '{table_location}'" + + pg_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", pg_conn) + pg_conn.commit() + + superuser_conn.rollback() + run_command(f"DROP SERVER {SERVER_NAME} CASCADE", superuser_conn) + superuser_conn.commit() + + if guc_name is not None: + run_command(f"RESET {guc_name}", superuser_conn) + superuser_conn.commit() + + +def test_reject_modify_different_rest_catalogs_in_single_transaction( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Modifying tables from two different REST catalog servers in the same + transaction must be rejected at statement time -- before any Parquet is + written to S3 for the offending statement. The first DML binds the + transaction to its REST catalog; the second DML to a different catalog + must raise immediately rather than waiting for XACT_EVENT_PRE_COMMIT. + """ + if installcheck: + return + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + for name in ["rest_catalog_a", "rest_catalog_b"]: + run_command( + f""" + CREATE SERVER {name} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}') + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {name} TO PUBLIC", + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {TABLE_NAMESPACE}", pg_conn) + pg_conn.commit() + + for name, catalog in [("table_a", "rest_catalog_a"), ("table_b", "rest_catalog_b")]: + run_command( + f"CREATE TABLE {TABLE_NAMESPACE}.{name} (id bigint) USING iceberg WITH (catalog='{catalog}')", + pg_conn, + ) + pg_conn.commit() + + # First INSERT succeeds and binds the transaction to rest_catalog_a. + run_command( + f"INSERT INTO {TABLE_NAMESPACE}.table_a SELECT i FROM generate_series(1, 10) i", + pg_conn, + ) + + # Second INSERT must raise at statement time, not at COMMIT. The + # "the current statement targets" detail wording is emitted only by + # BindRelationToXactRestCatalog(); the XACT_EVENT_PRE_COMMIT fallback + # uses "table %u belongs to" instead, so this match also pins down + # which code path fired. + with pytest.raises( + psycopg2.errors.FeatureNotSupported, + match=r"the current statement targets", + ): + run_command( + f"INSERT INTO {TABLE_NAMESPACE}.table_b SELECT i FROM generate_series(1, 10) i", + pg_conn, + ) + + pg_conn.rollback() + + for name, catalog in [("table_a", "rest_catalog_a"), ("table_b", "rest_catalog_b")]: + run_command( + f"DROP TABLE IF EXISTS {TABLE_NAMESPACE}.{name}", + pg_conn, + ) + pg_conn.commit() + + run_command(f"DROP SCHEMA IF EXISTS {TABLE_NAMESPACE}", pg_conn) + pg_conn.commit() + + superuser_conn.rollback() + for name in ["rest_catalog_a", "rest_catalog_b"]: + run_command(f"DROP SERVER IF EXISTS {name}", superuser_conn) + superuser_conn.commit() + + +def test_multi_table_single_transaction_on_same_server( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Two tables on the *same* user-created server: INSERT into one and + UPDATE the other in a single transaction must succeed. + """ + if installcheck: + return + + SERVER_NAME = "rest_multi_tbl" + SCHEMA_NAME = TABLE_NAMESPACE + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.multi_a (id bigint, value text) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + ) + run_command( + f"CREATE TABLE {SCHEMA_NAME}.multi_b (id bigint, value text) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + ) + pg_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.multi_b SELECT i, 'old' FROM generate_series(1, 5) i", + pg_conn, + ) + pg_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.multi_a SELECT i, i::text FROM generate_series(1, 10) i", + pg_conn, + ) + run_command( + f"UPDATE {SCHEMA_NAME}.multi_b SET value = 'new' WHERE id <= 3", + pg_conn, + ) + pg_conn.commit() + + results_a = run_query(f"SELECT count(*) FROM {SCHEMA_NAME}.multi_a", pg_conn) + assert results_a[0][0] == 10 + + results_b = run_query( + f"SELECT count(*) FROM {SCHEMA_NAME}.multi_b WHERE value = 'new'", pg_conn + ) + assert results_b[0][0] == 3 + + pg_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", pg_conn) + pg_conn.commit() + + superuser_conn.rollback() + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_token_cache_reuses_token_across_catalog_ops( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + set_polaris_gucs, + create_http_helper_functions, +): + """ + The per-catalog token cache must reuse a single OAuth token across + multiple back-to-back catalog operations in the same session. + A cache miss on every call would double request latency. + + Uses pg_lake_iceberg.http_client_trace_traffic to observe actual + HTTP traffic: each token fetch shows up as a POST to .../oauth/tokens + in the connection notices. Does 3 back-to-back inserts. + """ + if installcheck: + return + + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "token_cache_test" + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + run_command( + "SET pg_lake_iceberg.http_client_trace_traffic TO on", + pg_conn, + ) + pg_conn.notices.clear() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint, value text) " + f"USING iceberg WITH (catalog='rest')", + pg_conn, + ) + pg_conn.commit() + + for i in range(3): + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES ({i}, 'v')", + pg_conn, + ) + pg_conn.commit() + + token_fetches = sum( + 1 for n in pg_conn.notices if "oauth/tokens" in n and "POST" in n + ) + assert token_fetches == 1, ( + f"Expected exactly 1 OAuth token fetch (cached), got {token_fetches}. " + f"Notices:\n" + "\n".join(pg_conn.notices) + ) + + run_command( + "RESET pg_lake_iceberg.http_client_trace_traffic", + pg_conn, + ) + + pg_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", pg_conn) + pg_conn.commit() + + +def test_alter_server_credentials_invalidates_token_cache( + installcheck, + superuser_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + After ALTER SERVER, the cached OAuth token must be discarded so the + next catalog operation re-fetches it. We verify this by enabling + HTTP traffic tracing and checking that a POST to .../oauth/tokens + appears after the ALTER SERVER (proving the cache was invalidated). + Test that cache is invalidated on bogus credentials (1 fetch, commit fails), + then cache is invalidated again on restored credentials (1 fetch, commit succeeds). + """ + if installcheck: + return + + SERVER_NAME = "rest_token_inval" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "token_inval_test" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", superuser_conn) + superuser_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (1)", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + "SET pg_lake_iceberg.http_client_trace_traffic TO on", + superuser_conn, + ) + superuser_conn.notices.clear() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (2)", + superuser_conn, + ) + superuser_conn.commit() + + pre_alter_fetches = sum( + 1 for n in superuser_conn.notices if "oauth/tokens" in n and "POST" in n + ) + assert pre_alter_fetches == 0, ( + f"Expected no token fetch before ALTER SERVER (token cached), " + f"got {pre_alter_fetches}. Notices:\n" + "\n".join(superuser_conn.notices) + ) + + run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (SET client_id 'rotated-id')", + superuser_conn, + ) + superuser_conn.commit() + + superuser_conn.notices.clear() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (3)", + superuser_conn, + ) + + commit_failed = False + try: + superuser_conn.commit() + except psycopg2.DatabaseError: + commit_failed = True + superuser_conn.rollback() + + post_alter_notices = list(superuser_conn.notices) + post_alter_fetches = sum( + 1 for n in post_alter_notices if "oauth/tokens" in n and "POST" in n + ) + + assert commit_failed, ( + "Expected COMMIT to fail after ALTER SERVER set bogus client_id " + "(cache should have been invalidated, forcing re-auth with bad creds)" + ) + assert post_alter_fetches == 1, ( + f"Expected exactly 1 token re-fetch after ALTER SERVER (cache invalidated), " + f"got {post_alter_fetches}. Notices ({len(post_alter_notices)}):\n" + + "\n".join(post_alter_notices) + ) + + run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (SET client_id '{client_id}')", + superuser_conn, + ) + superuser_conn.commit() + + superuser_conn.notices.clear() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (4)", + superuser_conn, + ) + superuser_conn.commit() + + restore_fetches = sum( + 1 for n in superuser_conn.notices if "oauth/tokens" in n and "POST" in n + ) + assert restore_fetches == 1, ( + f"Expected exactly 1 token re-fetch after restoring credentials, " + f"got {restore_fetches}. Notices:\n" + "\n".join(superuser_conn.notices) + ) + + results = run_query( + f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", superuser_conn + ) + assert results[0][0] == 3 + + run_command( + "RESET pg_lake_iceberg.http_client_trace_traffic", + superuser_conn, + ) + + superuser_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", superuser_conn) + superuser_conn.commit() + + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_drop_server_with_dependent_iceberg_table( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Tables created with catalog='' record a pg_depend entry on + the server. DROP SERVER without CASCADE must be blocked, and + DROP SERVER CASCADE must drop the dependent table. + """ + if installcheck: + return + + SERVER_NAME = "rest_dep_test" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "dep_tbl" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}') + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + ) + pg_conn.commit() + + err = run_command( + f"DROP SERVER {SERVER_NAME}", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot drop" in str(err).lower() + superuser_conn.rollback() + + run_command(f"DROP SERVER {SERVER_NAME} CASCADE", superuser_conn) + superuser_conn.commit() + + result = run_query( + f"SELECT count(*) FROM pg_class WHERE relname = '{TABLE_NAME}'", + pg_conn, + ) + assert result[0][0] == 0 + + +def test_drop_owned_by_with_dependent_iceberg_table( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + When a role owns an iceberg_catalog server that has dependent iceberg + tables, DROP OWNED BY must propagate the dependency: + - DROP OWNED BY RESTRICT -> blocked + - DROP OWNED BY CASCADE -> drops the dependent table too + """ + if installcheck: + return + + ROLE_NAME = "rest_owned_test_role" + SERVER_NAME = "rest_owned_test_srv" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "owned_dep_tbl" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command(f"DROP ROLE IF EXISTS {ROLE_NAME}", superuser_conn, raise_error=False) + superuser_conn.commit() + + run_command( + f"CREATE ROLE {ROLE_NAME} WITH CREATEDB LOGIN", + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN DATA WRAPPER iceberg_catalog TO {ROLE_NAME}", + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"SET ROLE {ROLE_NAME}", superuser_conn) + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}') + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) + run_command("RESET ROLE", superuser_conn) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + ) + pg_conn.commit() + + err = run_command( + f"DROP OWNED BY {ROLE_NAME} RESTRICT", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "cannot drop" in str(err).lower() + superuser_conn.rollback() + + server_exists = run_query( + f"SELECT count(*) FROM pg_foreign_server WHERE srvname = '{SERVER_NAME}'", + superuser_conn, + ) + assert server_exists[0][0] == 1 + + table_exists = run_query( + f"SELECT count(*) FROM pg_class WHERE relname = '{TABLE_NAME}'", + pg_conn, + ) + assert table_exists[0][0] == 1 + + run_command(f"DROP OWNED BY {ROLE_NAME} CASCADE", superuser_conn) + superuser_conn.commit() + + server_exists = run_query( + f"SELECT count(*) FROM pg_foreign_server WHERE srvname = '{SERVER_NAME}'", + superuser_conn, + ) + assert server_exists[0][0] == 0 + + table_exists = run_query( + f"SELECT count(*) FROM pg_class WHERE relname = '{TABLE_NAME}'", + pg_conn, + ) + assert table_exists[0][0] == 0 + + run_command(f"DROP ROLE {ROLE_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_drop_server_blocks_on_active_query( + installcheck, + superuser_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + DROP SERVER ... CASCADE on a server with a dependent iceberg table must + block while another transaction holds AccessShareLock on that table. + This verifies that the pg_depend edge registered by + RecordIcebergCatalogServerDependency is honored by core's dependency + walker, which acquires AccessExclusiveLock on each dependent table. + """ + if installcheck: + return + + SERVER_NAME = "rest_lock_test_srv" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "lock_dep_tbl" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}') + """, + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", superuser_conn) + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + superuser_conn, + ) + superuser_conn.commit() + + holder_conn = open_pg_conn("postgres") + dropper_conn = open_pg_conn("postgres") + try: + run_command("BEGIN", holder_conn) + run_command( + f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", + holder_conn, + ) + + drop_done = threading.Event() + drop_error = [] + + def run_drop(): + try: + run_command(f"DROP SERVER {SERVER_NAME} CASCADE", dropper_conn) + dropper_conn.commit() + except Exception as e: + drop_error.append(e) + finally: + drop_done.set() + + drop_thread = threading.Thread(target=run_drop) + drop_thread.start() + + deadline = time.time() + 10 + blocked = False + while time.time() < deadline: + rows = run_query( + """ + SELECT count(*) FROM pg_stat_activity + WHERE wait_event_type = 'Lock' + AND query LIKE 'DROP SERVER%' + """, + superuser_conn, + ) + superuser_conn.rollback() + if int(rows[0][0]) == 1: + blocked = True + break + time.sleep(0.1) + + assert blocked, "Expected DROP SERVER to be blocked on AccessShareLock" + assert not drop_done.is_set(), "DROP SERVER should not have completed yet" + + run_command("COMMIT", holder_conn) + + drop_thread.join(timeout=15) + assert drop_done.is_set(), "DROP SERVER did not complete after lock released" + assert not drop_error, f"DROP SERVER failed: {drop_error[0]}" + + result = run_query( + f"SELECT count(*) FROM pg_class WHERE relname = '{TABLE_NAME}'", + superuser_conn, + ) + assert result[0][0] == 0 + finally: + try: + holder_conn.rollback() + except Exception: + pass + holder_conn.close() + dropper_conn.close() + + +def test_reject_writable_table_on_server_with_catalog_name( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Creating a writable table on a server that has catalog_name set must + be rejected, because writable tables always derive the catalog name + from the database name. + """ + if installcheck: + return + + SERVER_NAME = "rest_catalog_has_catname" + SCHEMA_NAME = TABLE_NAMESPACE + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + catalog_name '{server_params.PG_DATABASE}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + err = run_command( + f"CREATE TABLE {SCHEMA_NAME}.reject_catname (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + pg_conn, + raise_error=False, + ) + assert err is not None + assert ( + "writable REST catalog tables cannot use a server with catalog_name set" + in str(err) + ) + pg_conn.rollback() + + superuser_conn.rollback() + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_alter_server_add_catalog_name_does_not_reroute_writable_table( + installcheck, + superuser_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Writable tables always use the database name for REST catalog routing, + ignoring the server's catalog_name. Adding catalog_name to the server + after a writable table exists must NOT change where requests go. + """ + if installcheck: + return + + SERVER_NAME = "rest_catname_reroute" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "catname_reroute_test" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", superuser_conn) + superuser_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (1)", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (ADD catalog_name 'nonexistent_db')", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{TABLE_NAME} VALUES (2)", + superuser_conn, + ) + superuser_conn.commit() + + results = run_query( + f"SELECT count(*) FROM {SCHEMA_NAME}.{TABLE_NAME}", superuser_conn + ) + assert results[0][0] == 2 + + superuser_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", superuser_conn) + superuser_conn.commit() + + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_alter_server_rest_endpoint_blocked_with_dependent_tables( + installcheck, + superuser_conn, + s3, + extension, + with_default_location, + polaris_session, + create_http_helper_functions, +): + """ + Changing rest_endpoint on a server that has dependent iceberg tables + (writable or read-only) must be blocked, because both types make REST + API calls against the server's endpoint at runtime. + """ + if installcheck: + return + + SERVER_NAME = "rest_endpoint_block" + SCHEMA_NAME = TABLE_NAMESPACE + TABLE_NAME = "endpoint_block_test" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", superuser_conn) + superuser_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{TABLE_NAME} (id bigint) " + f"USING iceberg WITH (catalog='{SERVER_NAME}')", + superuser_conn, + ) + superuser_conn.commit() + + err = run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (SET rest_endpoint 'http://other:8181')", + superuser_conn, + raise_error=False, + ) + assert err is not None + assert "dependent iceberg tables" in str(err) + superuser_conn.rollback() + + run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (SET client_id '{client_id}')", + superuser_conn, + ) + superuser_conn.commit() + + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", superuser_conn) + superuser_conn.commit() + + err = run_command( + f"ALTER SERVER {SERVER_NAME} OPTIONS (SET rest_endpoint 'http://other:8181')", + superuser_conn, + raise_error=False, + ) + assert ( + err is None + ), "ALTER SERVER rest_endpoint should succeed after dependent tables are dropped" + superuser_conn.rollback() + + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +def test_table_catalog_name_overrides_server( + installcheck, + superuser_conn, + pg_conn, + s3, + extension, + with_default_location, + polaris_session, + set_polaris_gucs, + create_http_helper_functions, +): + """ + The table-level catalog_name takes precedence over the server's. + We create a writable table via the built-in 'rest' catalog, then + create a read-only table via a server whose catalog_name is wrong, + overriding it on the table with the correct one. If the table + option did not take precedence, the metadata lookup would fail. + """ + if installcheck: + return + + SERVER_NAME = "rest_catname_bad" + SCHEMA_NAME = "test_catname_override" + SRC_TABLE = "catname_src" + RO_TABLE = "catname_ro" + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://localhost:{server_params.POLARIS_PORT}" + + run_command(f"CREATE SCHEMA IF NOT EXISTS {SCHEMA_NAME}", pg_conn) + pg_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{SRC_TABLE} (id bigint) " + f"USING iceberg WITH (catalog='rest')", + pg_conn, + ) + pg_conn.commit() + + run_command( + f"INSERT INTO {SCHEMA_NAME}.{SRC_TABLE} " + f"SELECT i FROM generate_series(1, 5) i", + pg_conn, + ) + pg_conn.commit() + + run_command( + f""" + CREATE SERVER {SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + catalog_name 'nonexistent_catalog') + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {SERVER_NAME} TO PUBLIC", + superuser_conn, + ) + superuser_conn.commit() + + run_command( + f"CREATE TABLE {SCHEMA_NAME}.{RO_TABLE} () " + f"USING iceberg WITH (catalog='{SERVER_NAME}', read_only='true', " + f"catalog_name='{server_params.PG_DATABASE}', " + f"catalog_namespace='{SCHEMA_NAME}', " + f"catalog_table_name='{SRC_TABLE}')", + pg_conn, + ) + pg_conn.commit() + + results = run_query(f"SELECT count(*) FROM {SCHEMA_NAME}.{RO_TABLE}", pg_conn) + assert results[0][0] == 5 + + pg_conn.rollback() + run_command(f"DROP SCHEMA {SCHEMA_NAME} CASCADE", pg_conn) + pg_conn.commit() + + superuser_conn.rollback() + run_command(f"DROP SERVER {SERVER_NAME}", superuser_conn) + superuser_conn.commit() diff --git a/pg_lake_table/tests/pytests/test_writable_iceberg_common.py b/pg_lake_table/tests/pytests/test_writable_iceberg_common.py index 46662abd..ff23121b 100644 --- a/pg_lake_table/tests/pytests/test_writable_iceberg_common.py +++ b/pg_lake_table/tests/pytests/test_writable_iceberg_common.py @@ -4,6 +4,7 @@ import json import re import random +from pathlib import Path TABLE_NAMESPACE = "test_writable_iceberg" @@ -270,3 +271,90 @@ def create_iceberg_rest_table( pg_conn.rollback() run_command(f"DROP SCHEMA {TABLE_NAMESPACE} CASCADE", pg_conn) pg_conn.commit() + + +USER_SERVER_NAME = "crud_test_server" + + +@pytest.fixture +def create_iceberg_user_server_rest_table( + superuser_conn, + pg_conn, + with_default_location, + generate_table_name, + polaris_session, + installcheck, +): + """Same as create_iceberg_rest_table but routes through a user-created + iceberg_catalog server instead of the built-in 'rest' GUC path.""" + if installcheck: + yield + return + + creds = json.loads(Path(server_params.POLARIS_PRINCIPAL_CREDS_FILE).read_text()) + client_id = creds["credentials"]["clientId"] + client_secret = creds["credentials"]["clientSecret"] + endpoint = f"http://{server_params.POLARIS_HOSTNAME}:{server_params.POLARIS_PORT}" + + run_command( + f""" + CREATE SERVER {USER_SERVER_NAME} TYPE 'rest' + FOREIGN DATA WRAPPER iceberg_catalog + OPTIONS (rest_endpoint '{endpoint}', + client_id '{client_id}', + client_secret '{client_secret}', + location_prefix 's3://{TEST_BUCKET}') + """, + superuser_conn, + ) + run_command( + f"GRANT USAGE ON FOREIGN SERVER {USER_SERVER_NAME} TO PUBLIC", + superuser_conn, + ) + superuser_conn.commit() + + table_name = generate_table_name + + run_command(f"CREATE SCHEMA {TABLE_NAMESPACE}", pg_conn) + run_command( + f"CREATE TABLE {TABLE_NAMESPACE}.{table_name} " + f"(drop_col_1 INT, id_old bigint, drop_col_2 INT) " + f"USING iceberg WITH (catalog='{USER_SERVER_NAME}')", + pg_conn, + ) + + run_command( + f"ALTER TABLE {TABLE_NAMESPACE}.{table_name} " + f"DROP COLUMN drop_col_2, ADD COLUMN value text, DROP COLUMN drop_col_1", + pg_conn, + ) + run_command( + f"ALTER TABLE {TABLE_NAMESPACE}.{table_name} RENAME COLUMN id_old TO id", + pg_conn, + ) + run_command( + f"ALTER FOREIGN TABLE {TABLE_NAMESPACE}.{table_name} " + f"OPTIONS (ADD autovacuum_enabled 'false')", + pg_conn, + ) + + pg_conn.commit() + + yield table_name + + pg_conn.rollback() + run_command(f"DROP SCHEMA {TABLE_NAMESPACE} CASCADE", pg_conn) + pg_conn.commit() + run_command(f"DROP SERVER IF EXISTS {USER_SERVER_NAME}", superuser_conn) + superuser_conn.commit() + + +@pytest.fixture +def create_iceberg_rest_table_parametrized(request): + """Dispatches to either the built-in 'rest' or user-server fixture + based on the indirect parameter.""" + fixture_name = { + "rest": "create_iceberg_rest_table", + "user_server": "create_iceberg_user_server_rest_table", + }[request.param] + return request.getfixturevalue(fixture_name)