Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
f016e2c
Add iceberg_catalog FDW and refactor REST catalog for multi-server su…
sfc-gh-npuka Mar 2, 2026
d80b574
Recognize server-based REST catalogs in catalog type detection
sfc-gh-npuka Mar 2, 2026
354c8e6
Support server-based catalogs in option validation and table creation
sfc-gh-npuka Mar 2, 2026
e7e5f58
Track per-table REST catalog connections in transaction handling
sfc-gh-npuka Mar 2, 2026
573cc93
Add tests for iceberg_catalog server configuration
sfc-gh-npuka Mar 2, 2026
4fd8ce5
Add ProtectExtensionCatalogServersHandler
sfc-gh-npuka Mar 12, 2026
f2b393d
Address review
sfc-gh-npuka Mar 12, 2026
3b5228a
Allow extension owned rest catalog to alter server options
sfc-gh-npuka Mar 12, 2026
b3c0ef9
Address review part 1
sfc-gh-npuka Mar 13, 2026
0171e02
Address review part 2
sfc-gh-npuka Mar 13, 2026
32d64c7
Block reserved catalog names and fix prefix-match comparisons
sfc-gh-npuka Mar 13, 2026
c2d7f1a
Address Onder's review
sfc-gh-npuka Mar 19, 2026
6edcfe0
Treat postgres, object_store, and rest as built-in names, not servers
sfc-gh-npuka Mar 24, 2026
911135b
Use catalog_name server option for REST catalog tables
sfc-gh-npuka Mar 24, 2026
e83fd0b
Cleanup from rebase and fix style
sfc-gh-npuka Apr 16, 2026
a75268d
Address Aykut's latest review
sfc-gh-npuka Apr 16, 2026
a8236b3
Move changes to pg_lake_iceberg--3.3--3.4.sql
sfc-gh-npuka Apr 20, 2026
ad06a85
Address Onder's amazing comprehensive review
sfc-gh-npuka Apr 21, 2026
6374ee0
Some cleanup from last review addressing since I left everything in C…
sfc-gh-npuka May 4, 2026
46daf15
Fix GetRestCatalogName dead code
sfc-gh-npuka May 4, 2026
ef097de
More cleanup from Onder's latest review
sfc-gh-npuka May 23, 2026
0e65cd7
Back built-in iceberg catalogs with foreign server objects
sfc-gh-npuka May 25, 2026
87dbbc1
Use iceberg_catalog server OID as canonical REST catalog identity
sfc-gh-npuka May 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions pg_lake_engine/include/pg_lake/util/catalog_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,34 @@

#pragma once

/* FDW name for iceberg_catalog servers */
#define ICEBERG_CATALOG_FDW_NAME "iceberg_catalog"

/*
* The allowed values for IcebergDefaultCatalog, case insensitive.
*
* These are the user-facing short names used as the catalog= option value
* on CREATE TABLE ... USING iceberg. Internally they map to the
* prefixed built-in server names below; users never type the prefixed
* names directly.
*/
#define POSTGRES_CATALOG_NAME "postgres"
#define OBJECT_STORE_CATALOG_NAME "object_store"
#define REST_CATALOG_NAME "rest"

/*
* Built-in iceberg_catalog server names. Pre-created by the extension
* upgrade script and exist purely as anchors for pg_depend edges and the
* uniform server-lookup path. All ALTER/DROP/RENAME on these names is
* blocked (configuration for the built-in catalogs lives in GUCs).
*
* The prefix keeps them clear of names users are likely to have already
* used (notably the very common "CREATE SERVER postgres FDW postgres_fdw").
*/
#define PG_LAKE_POSTGRES_CATALOG_SERVER_NAME "pg_lake_postgres_catalog"
#define PG_LAKE_OBJECT_STORE_CATALOG_SERVER_NAME "pg_lake_object_store_catalog"
#define PG_LAKE_REST_CATALOG_SERVER_NAME "pg_lake_rest_catalog"

typedef enum IcebergCatalogType
{
NONE_CATALOG = 0,
Expand Down Expand Up @@ -61,3 +82,7 @@ extern PGDLLEXPORT IcebergCatalogType GetIcebergCatalogType(Oid relationId);
extern PGDLLEXPORT bool HasRestCatalogTableOption(List *options);
extern PGDLLEXPORT bool HasObjectStoreCatalogTableOption(List *options);
extern PGDLLEXPORT bool HasReadOnlyOption(List *options);
extern PGDLLEXPORT bool IsCatalogOwnedByExtension(const char *catalog);
extern PGDLLEXPORT bool IsRestCatalog(const char *catalog);
extern PGDLLEXPORT const char *ResolveCatalogServerName(const char *catalog);
extern PGDLLEXPORT bool IsBuiltinCatalogServerName(const char *serverName);
125 changes: 121 additions & 4 deletions pg_lake_engine/src/utils/catalog_type.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,16 @@ GetIcebergCatalogType(Oid relationId)


/*
* HasRestCatalogTableOption returns true if the options contain
* catalog='rest'.
* HasRestCatalogTableOption returns true if the catalog option indicates a
* REST catalog: either the literal value 'rest' or the name of an
* iceberg_catalog foreign server with TYPE 'rest'.
*/
bool
HasRestCatalogTableOption(List *options)
{
char *catalog = GetStringOption(options, "catalog", false);

return catalog ? pg_strncasecmp(catalog, REST_CATALOG_NAME, strlen(catalog)) == 0 : false;
return IsRestCatalog(catalog);
}


Expand All @@ -89,7 +90,7 @@ HasObjectStoreCatalogTableOption(List *options)
{
char *catalog = GetStringOption(options, "catalog", false);

return catalog ? pg_strncasecmp(catalog, OBJECT_STORE_CATALOG_NAME, strlen(catalog)) == 0 : false;
return catalog ? pg_strcasecmp(catalog, OBJECT_STORE_CATALOG_NAME) == 0 : false;
}


Expand All @@ -104,3 +105,119 @@ HasReadOnlyOption(List *options)

return readOnly ? pg_strncasecmp(readOnly, "true", strlen("true")) == 0 : false;
}


/*
* IsCatalogOwnedByExtension returns true if the catalog name is one of
* the reserved built-in names: 'rest', 'object_store', or 'postgres'.
* Comparison is case-insensitive.
*/
bool
IsCatalogOwnedByExtension(const char *catalog)
{
return pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0 ||
pg_strcasecmp(catalog, OBJECT_STORE_CATALOG_NAME) == 0 ||
pg_strcasecmp(catalog, POSTGRES_CATALOG_NAME) == 0;
}


/*
* IsRestCatalog returns true if the catalog name identifies a REST catalog.
* This includes the built-in 'rest' literal and any user-created
* iceberg_catalog server whose TYPE is 'rest'.
*
* The internal built-in server names (e.g. "pg_lake_rest_catalog") are
* deliberately rejected: they are implementation details and must not be
* usable as catalog= option values on CREATE TABLE. Users always type
* the short name "rest", which is mapped to the long server name only
* inside the resolution layer.
*/
bool
IsRestCatalog(const char *catalog)
{
if (catalog == NULL)
return false;

if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be strcmp? I think we should allow below. Shouldnt we do case sensitive comparison in general?

postgres=# CREATE SERVER "PostgreS" TYPE 'rest'                                                                            FOREIGN DATA WRAPPER iceberg_catalog                                                                                     OPTIONS (rest_endpoint 'http://polaris:8181',                                                                                     client_id '...', client_secret '...',                                                                                    location_prefix 's3://testbucket/aykutrest');
ERROR:  server name "PostgreS" is reserved for the extension-owned catalog
HINT:  Choose a different server name.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not allowing since options are case insensitive, i.e. if you do CREATE TABLE a USING iceberg WITH (catalog = 'Postgres') it's automatically gonna resolve to "postgres" catalog.

return true;

if (IsBuiltinCatalogServerName(catalog))
return false;

/* Try to look up a server with this name */
bool missingOK = true;
ForeignServer *server = GetForeignServerByName(catalog, missingOK);

if (server == NULL)
return false;

ForeignDataWrapper *fdw = GetForeignDataWrapper(server->fdwid);

if (strcmp(fdw->fdwname, ICEBERG_CATALOG_FDW_NAME) != 0)
return false;

/*
* Any iceberg_catalog server reaching this point is user-created, and
* ValidateIcebergCatalogServerDDL forces all user-created iceberg_catalog
* servers to TYPE 'rest'.
*/
Assert(pg_strcasecmp(server->servertype, REST_CATALOG_NAME) == 0);
return true;
}


/*
* ResolveCatalogServerName maps a user-facing catalog identifier to the
* actual pg_foreign_server.srvname.
*
* For the three reserved short names ('postgres', 'object_store', 'rest')
* the result is the corresponding pre-created built-in server name.
* Any other input is returned unchanged (user-created server names match
* their catalog= option value verbatim).
*
* The returned pointer is either a string literal or the input pointer;
* callers must not free it.
*/
const char *
ResolveCatalogServerName(const char *catalog)
{
if (catalog == NULL)
return NULL;

if (pg_strcasecmp(catalog, REST_CATALOG_NAME) == 0)
return PG_LAKE_REST_CATALOG_SERVER_NAME;
if (pg_strcasecmp(catalog, POSTGRES_CATALOG_NAME) == 0)
return PG_LAKE_POSTGRES_CATALOG_SERVER_NAME;
if (pg_strcasecmp(catalog, OBJECT_STORE_CATALOG_NAME) == 0)
return PG_LAKE_OBJECT_STORE_CATALOG_SERVER_NAME;

return catalog;
}


/*
* IsBuiltinCatalogServerName returns true if the given name matches one
* of the three pre-created built-in iceberg_catalog servers.
*
* Comparison is case-insensitive: both PostgreSQL-parsed identifiers
* (already downcased by the parser unless quoted) and free-form string
* literals supplied as catalog= option values flow through this helper,
* and we want to reject typos like 'PG_LAKE_REST_CATALOG' just as
* firmly as the canonical form.
*
* This is the long-name counterpart to IsCatalogOwnedByExtension, which
* operates on the user-facing short names. Used by the DDL protection
* hook to lock down ALTER/RENAME/OWNER on the extension's structural
* anchors and by create_table.c to reject the long names as catalog=
* option values.
*/
bool
IsBuiltinCatalogServerName(const char *serverName)
{
if (serverName == NULL)
return false;

return pg_strcasecmp(serverName, PG_LAKE_REST_CATALOG_SERVER_NAME) == 0 ||
pg_strcasecmp(serverName, PG_LAKE_POSTGRES_CATALOG_SERVER_NAME) == 0 ||
pg_strcasecmp(serverName, PG_LAKE_OBJECT_STORE_CATALOG_SERVER_NAME) == 0;
}
54 changes: 47 additions & 7 deletions pg_lake_iceberg/include/pg_lake/rest_catalog/rest_catalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
#pragma once

#include "postgres.h"
#include "pg_lake/ddl/utility_hook.h"
#include "pg_lake/http/http_client.h"
#include "pg_lake/util/rel_utils.h"
#include "pg_lake/parquet/field.h"
#include "pg_lake/iceberg/api/snapshot.h"

#define REST_CATALOG_AUTH_TYPE_DEFAULT (0)
#define REST_CATALOG_AUTH_TYPE_OAUTH2 (0)
#define REST_CATALOG_AUTH_TYPE_HORIZON (1)

extern PGDLLEXPORT char *RestCatalogHost;
Expand All @@ -34,6 +35,37 @@ extern char *RestCatalogScope;
extern int RestCatalogAuthType;
extern bool RestCatalogEnableVendedCredentials;

/*
* Resolved REST catalog connection options. All REST catalogs --
* built-in ('rest') and user-created (CREATE SERVER ... FOREIGN DATA
* WRAPPER iceberg_catalog) -- are backed by a real pg_foreign_server
* row; ApplyGUCDefaults populates the defaults, ApplyServerOptionOverrides
* layers on any per-server options.
*
* The canonical identity of a catalog is `serverOid` (the OID of the
* iceberg_catalog server row). Use it for in-memory equality, token
* cache keys, and syscache-driven invalidation. `catalog` stores the
* user-visible short name (e.g. 'rest', 'my_polaris') purely for error
* messages.
*/
typedef struct RestCatalogOptions
{
Oid serverOid; /* iceberg_catalog server OID; canonical
* identity, never InvalidOid for resolved
* opts */
char *catalog; /* short user-facing name; used in error
* messages, never for equality */
char *host;
char *oauthHostPath;
char *clientId;
char *clientSecret;
char *scope;
char *locationPrefix;
char *catalogName; /* REST API catalog prefix; defaults to dbname */
int authType;
bool enableVendedCredentials;
} RestCatalogOptions;

#define REST_CATALOG_AUTH_TOKEN_PATH "%s/api/catalog/v1/oauth/tokens"

#define REST_CATALOG_NAMESPACE_NAME "%s/api/catalog/v1/%s/namespaces/%s"
Expand Down Expand Up @@ -77,24 +109,32 @@ typedef struct RestCatalogRequest
#define REST_CATALOG_AUTH_TOKEN_PATH "%s/api/catalog/v1/oauth/tokens"
#define GET_REST_CATALOG_METADATA_LOCATION "%s/api/catalog/v1/%s/namespaces/%s/tables/%s"

extern PGDLLEXPORT void RegisterNamespaceToRestCatalog(const char *catalogName, const char *namespaceName);
/* Catalog options resolution */
extern PGDLLEXPORT RestCatalogOptions * ResolveRestCatalogOptions(const char *catalog);
extern PGDLLEXPORT RestCatalogOptions * GetRestCatalogOptionsForRelation(Oid relationId);
extern PGDLLEXPORT RestCatalogOptions * CopyRestCatalogOptions(MemoryContext dst, const RestCatalogOptions * src);

extern PGDLLEXPORT void RegisterNamespaceToRestCatalog(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName);
extern PGDLLEXPORT void StartStageRestCatalogIcebergTableCreate(Oid relationId);
extern PGDLLEXPORT char *FinishStageRestCatalogIcebergTableCreateRestRequest(Oid relationId, DataFileSchema * dataFileSchema, List *partitionSpecs);
extern PGDLLEXPORT void ErrorIfRestNamespaceDoesNotExist(const char *catalogName, const char *namespaceName);
extern PGDLLEXPORT void ErrorIfRestNamespaceDoesNotExist(RestCatalogOptions * opts, const char *catalogName, const char *namespaceName);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isnt options enough as parameter? we might add namespace into the struct as well.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

opts->catalogName holds the server-level catalog name, but here the catalogName comes from the table option (or get_database_name))
similarly namespace is kind of a table-level option

extern PGDLLEXPORT char *GetRestCatalogName(Oid relationId);
extern PGDLLEXPORT char *GetRestCatalogNamespace(Oid relationId);
extern PGDLLEXPORT char *GetRestCatalogTableName(Oid relationId);
extern PGDLLEXPORT bool IsReadOnlyRestCatalogIcebergTable(Oid relationId);
extern PGDLLEXPORT char *GetMetadataLocationFromRestCatalog(const char *restCatalogName, const char *namespaceName,
extern PGDLLEXPORT char *GetMetadataLocationFromRestCatalog(RestCatalogOptions * opts, const char *restCatalogName, const char *namespaceName,
const char *relationName);
extern PGDLLEXPORT char *GetMetadataLocationForRestCatalogForIcebergTable(Oid relationId);
extern PGDLLEXPORT void ReportHTTPError(HttpResult httpResult, int level);
extern PGDLLEXPORT List *PostHeadersWithAuth(void);
extern PGDLLEXPORT List *DeleteHeadersWithAuth(void);
extern PGDLLEXPORT HttpResult SendRequestToRestCatalog(HttpMethod method, const char *url, const char *body, List *headers);
extern PGDLLEXPORT List *PostHeadersWithAuth(RestCatalogOptions * opts);
extern PGDLLEXPORT List *DeleteHeadersWithAuth(RestCatalogOptions * opts);
extern PGDLLEXPORT HttpResult SendRequestToRestCatalog(RestCatalogOptions * opts, HttpMethod method, const char *url, const char *body, List *headers);
extern PGDLLEXPORT RestCatalogRequest * GetAddSnapshotCatalogRequest(IcebergSnapshot * newSnapshot, Oid relationId);
extern PGDLLEXPORT RestCatalogRequest * GetAddSchemaCatalogRequest(Oid relationId, DataFileSchema * dataFileSchema);
extern PGDLLEXPORT RestCatalogRequest * GetSetCurrentSchemaCatalogRequest(Oid relationId, int32_t schemaId);
extern PGDLLEXPORT RestCatalogRequest * GetAddPartitionCatalogRequest(Oid relationId, List *partitionSpec);
extern PGDLLEXPORT RestCatalogRequest * GetSetPartitionDefaultIdCatalogRequest(Oid relationId, int specId);
extern PGDLLEXPORT RestCatalogRequest * GetRemoveSnapshotCatalogRequest(List *removedSnapshotIds, Oid relationId);

/* ProcessUtility handler for iceberg_catalog server DDL validation */
extern PGDLLEXPORT bool ValidateIcebergCatalogServerDDL(ProcessUtilityParams * processUtilityParams, void *arg);
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the function is in pg_lake_iceberg, but we register in pg_lake_table, why is so? Can't we register in pg_lake_iceberg?

I think the functionality will be equivalent, but this might confuse future readers.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sfc-gh-abozkurt could you also share your input here

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On second thought, I'll move the registration to pg_lake_iceberg. For handlers acting on pg_lake tables makes sense to register them in pg_lake_table, but this handler acts on the iceberg_catalog FDW that pg_lake_iceberg itself owns.

1 change: 0 additions & 1 deletion pg_lake_iceberg/pg_lake_iceberg--3.2--3.3.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,3 @@ CREATE OR REPLACE VIEW pg_catalog.iceberg_tables AS
SELECT catalog_name, table_namespace, table_name, metadata_location, previous_metadata_location
FROM lake_iceberg.tables
WHERE metadata_location IS NOT NULL;

81 changes: 81 additions & 0 deletions pg_lake_iceberg/pg_lake_iceberg--3.3--3.4.sql
Original file line number Diff line number Diff line change
@@ -1 +1,82 @@
-- Upgrade script for pg_lake_iceberg from 3.3 to 3.4

/*
* iceberg_catalog foreign data wrapper: allows defining named catalog
* configurations via CREATE SERVER so that users are not limited to a
* single global REST catalog configured through GUC settings.
*
* Example:
* CREATE SERVER my_polaris TYPE 'rest'
* FOREIGN DATA WRAPPER iceberg_catalog
* OPTIONS (rest_endpoint 'http://polaris:8181',
* rest_auth_type 'default',
* client_id '...',
* client_secret '...');
*
* CREATE TABLE t (a int) USING iceberg WITH (catalog = 'my_polaris');
*/
CREATE FUNCTION lake_iceberg.iceberg_catalog_validator(text[], oid)
RETURNS void
AS 'MODULE_PATHNAME'
LANGUAGE C STRICT;

CREATE FOREIGN DATA WRAPPER iceberg_catalog
NO HANDLER
VALIDATOR lake_iceberg.iceberg_catalog_validator;

GRANT USAGE ON FOREIGN DATA WRAPPER iceberg_catalog TO lake_write;

/*
* Built-in catalog servers.
*
* These three servers are pre-created as structural anchors for the
* pg_depend dependency edges that iceberg tables record against their
* catalog server. They are extension-owned and immutable: ALTER, DROP,
* RENAME, and OWNER changes on them are all blocked. Configuration
* for the built-in catalogs lives in GUCs, not in server options.
*
* Users keep typing the short names ('postgres', 'object_store', 'rest')
* as the catalog= option value on CREATE TABLE; ResolveCatalogServerName
* maps short -> long at server lookup time. The long names are prefixed
* so they cannot collide with names users may already have in their
* databases (e.g. a postgres_fdw server literally named 'postgres').
*
* Pre-flight: error early with a clear hint if any of the long names is
* already in use. This prevents a confusing "server already exists"
* mid-upgrade.
*/
DO $do$
DECLARE
conflicting text;
BEGIN
SELECT srvname INTO conflicting
FROM pg_foreign_server
WHERE srvname IN ('pg_lake_postgres_catalog',
'pg_lake_object_store_catalog',
'pg_lake_rest_catalog')
LIMIT 1;

IF conflicting IS NOT NULL THEN
RAISE EXCEPTION
'pg_lake_iceberg upgrade conflicts with existing foreign server %', conflicting
USING HINT = 'Drop or rename the server and re-run ALTER EXTENSION pg_lake_iceberg UPDATE. '
'pg_lake_iceberg reserves the names pg_lake_postgres_catalog, '
'pg_lake_object_store_catalog, and pg_lake_rest_catalog for internal use.';
END IF;
END $do$;

CREATE SERVER pg_lake_postgres_catalog
TYPE 'postgres'
FOREIGN DATA WRAPPER iceberg_catalog;

CREATE SERVER pg_lake_object_store_catalog
TYPE 'object_store'
FOREIGN DATA WRAPPER iceberg_catalog;

CREATE SERVER pg_lake_rest_catalog
TYPE 'rest'
FOREIGN DATA WRAPPER iceberg_catalog;

GRANT USAGE ON FOREIGN SERVER pg_lake_postgres_catalog TO lake_write;
GRANT USAGE ON FOREIGN SERVER pg_lake_object_store_catalog TO lake_write;
GRANT USAGE ON FOREIGN SERVER pg_lake_rest_catalog TO lake_write;
Loading