-
Notifications
You must be signed in to change notification settings - Fork 105
Enum type support #193
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Enum type support #193
Changes from all commits
1e77b66
22aae7b
bda3e7f
3cdef4e
5672b48
95ad243
161e52a
5eab0b5
1eb2d28
d2f1bb0
0c216c0
9eed6e6
bb889f9
bdfe799
b46c909
1ad91e7
bd7d698
0ef8cc2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#pragma once | ||
|
||
#include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp" | ||
#include "duckdb/parser/parsed_data/create_type_info.hpp" | ||
#include "duckdb/storage/table_storage_info.hpp" | ||
|
||
extern "C" { | ||
#include "postgres.h" | ||
#include "utils/snapshot.h" | ||
#include "postgres.h" | ||
#include "catalog/namespace.h" | ||
#include "catalog/pg_class.h" | ||
#include "optimizer/planmain.h" | ||
#include "optimizer/planner.h" | ||
#include "utils/builtins.h" | ||
#include "utils/regproc.h" | ||
#include "utils/snapmgr.h" | ||
#include "utils/syscache.h" | ||
#include "access/htup_details.h" | ||
} | ||
|
||
namespace duckdb { | ||
|
||
class PostgresType : public TypeCatalogEntry { | ||
public: | ||
~PostgresType() { | ||
} | ||
PostgresType(Catalog &catalog, SchemaCatalogEntry &schema, CreateTypeInfo &info); | ||
}; | ||
|
||
} // namespace duckdb |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
DisableFormat: true | ||
SortIncludes: false |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,57 @@ | ||||||||||
#pragma once | ||||||||||
|
||||||||||
#include "duckdb/common/extra_type_info.hpp" | ||||||||||
#include "duckdb/common/vector.hpp" | ||||||||||
#include "duckdb/common/types/vector.hpp" | ||||||||||
#include "duckdb/common/string_map_set.hpp" | ||||||||||
#include "duckdb/common/serializer/deserializer.hpp" | ||||||||||
|
||||||||||
// This is copied directly without changes from 'duckdb/src/common/extra_type_info.cpp' | ||||||||||
// The reason this is copied is to be able to inherit from it. | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||
|
||||||||||
namespace duckdb { | ||||||||||
template <class T> | ||||||||||
struct EnumTypeInfoTemplated : public EnumTypeInfo { | ||||||||||
explicit EnumTypeInfoTemplated(Vector &values_insert_order_p, idx_t size_p) | ||||||||||
: EnumTypeInfo(values_insert_order_p, size_p) { | ||||||||||
D_ASSERT(values_insert_order_p.GetType().InternalType() == PhysicalType::VARCHAR); | ||||||||||
|
||||||||||
UnifiedVectorFormat vdata; | ||||||||||
values_insert_order.ToUnifiedFormat(size_p, vdata); | ||||||||||
|
||||||||||
auto data = UnifiedVectorFormat::GetData<string_t>(vdata); | ||||||||||
for (idx_t i = 0; i < size_p; i++) { | ||||||||||
auto idx = vdata.sel->get_index(i); | ||||||||||
if (!vdata.validity.RowIsValid(idx)) { | ||||||||||
throw InternalException("Attempted to create ENUM type with NULL value"); | ||||||||||
} | ||||||||||
if (values.count(data[idx]) > 0) { | ||||||||||
throw InvalidInputException("Attempted to create ENUM type with duplicate value %s", | ||||||||||
data[idx].GetString()); | ||||||||||
} | ||||||||||
values[data[idx]] = UnsafeNumericCast<T>(i); | ||||||||||
} | ||||||||||
} | ||||||||||
|
||||||||||
static shared_ptr<EnumTypeInfoTemplated> Deserialize(Deserializer &deserializer, uint32_t size) { | ||||||||||
Vector values_insert_order(LogicalType::VARCHAR, size); | ||||||||||
auto strings = FlatVector::GetData<string_t>(values_insert_order); | ||||||||||
|
||||||||||
deserializer.ReadList(201, "values", [&](Deserializer::List &list, idx_t i) { | ||||||||||
strings[i] = StringVector::AddStringOrBlob(values_insert_order, list.ReadElement<string>()); | ||||||||||
}); | ||||||||||
return make_shared_ptr<EnumTypeInfoTemplated>(values_insert_order, size); | ||||||||||
} | ||||||||||
|
||||||||||
const string_map_t<T> &GetValues() const { | ||||||||||
return values; | ||||||||||
} | ||||||||||
|
||||||||||
EnumTypeInfoTemplated(const EnumTypeInfoTemplated &) = delete; | ||||||||||
EnumTypeInfoTemplated &operator=(const EnumTypeInfoTemplated &) = delete; | ||||||||||
|
||||||||||
private: | ||||||||||
string_map_t<T> values; | ||||||||||
}; | ||||||||||
|
||||||||||
} // namespace duckdb |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
#pragma once | ||
|
||
#include "pgduckdb/duckdb_vendor/enum_type_info_templated.hpp" | ||
#include "duckdb/common/types/vector.hpp" | ||
#include "duckdb/common/types.hpp" | ||
|
||
extern "C" { | ||
#include "postgres.h" | ||
#include "catalog/pg_enum.h" | ||
#include "catalog/pg_type.h" | ||
#include "utils/syscache.h" | ||
#include "access/htup_details.h" | ||
} | ||
|
||
namespace pgduckdb { | ||
|
||
using duckdb::idx_t; | ||
using duckdb::LogicalType; | ||
using duckdb::Vector; | ||
|
||
// To store additional metadata for a type, DuckDB's LogicalType class contains a 'type_info_' field. | ||
// The type of this is ExtraTypeInfo, ENUMs make use of this in the form of EnumTypeInfo (see | ||
// duckdb/include/common/extra_type_info.hpp). We derive from this further to store the ENUMs connection with the oids | ||
// of Postgres' enum members. | ||
Comment on lines
+21
to
+24
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a super helpful comment, thanks a lot. |
||
|
||
template <class T> | ||
Tishj marked this conversation as resolved.
Show resolved
Hide resolved
|
||
class PGDuckDBEnumTypeInfo : public duckdb::EnumTypeInfoTemplated<T> { | ||
public: | ||
PGDuckDBEnumTypeInfo(Vector &values_insert_order_p, idx_t dict_size_p, Vector &enum_member_oids) | ||
: duckdb::EnumTypeInfoTemplated<T>(values_insert_order_p, dict_size_p), enum_member_oids(enum_member_oids) { | ||
} | ||
|
||
public: | ||
const Vector & | ||
GetMemberOids() const { | ||
return enum_member_oids; | ||
} | ||
|
||
duckdb::shared_ptr<duckdb::ExtraTypeInfo> | ||
Copy() const override { | ||
auto &insert_order = this->GetValuesInsertOrder(); | ||
Vector values_insert_order_copy(LogicalType::VARCHAR, false, false, 0); | ||
values_insert_order_copy.Reference(insert_order); | ||
|
||
Vector enum_member_oids_copy(LogicalType::UINTEGER, false, false, 0); | ||
enum_member_oids_copy.Reference(enum_member_oids); | ||
|
||
return duckdb::make_shared_ptr<PGDuckDBEnumTypeInfo>(values_insert_order_copy, this->GetDictSize(), | ||
enum_member_oids_copy); | ||
} | ||
|
||
private: | ||
Vector enum_member_oids; | ||
}; | ||
|
||
struct PGDuckDBEnum { | ||
Tishj marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be changed to a namespace? Now that this struct doesn't have any fields anymore? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea possibly, in our codebase we're not too strict on static-membered-structs vs namespaces, I think when we also need a constexpr value here we can't use a namespace but can use a struct, so I almost always go for using a struct |
||
static LogicalType CreateEnumType(std::vector<HeapTuple> &enum_members); | ||
static idx_t GetDuckDBEnumPosition(duckdb::Value &val); | ||
static idx_t GetEnumPosition(Datum enum_member_oid, const duckdb::LogicalType &type); | ||
static bool IsEnumType(Oid type_oid); | ||
static Oid GetEnumTypeOid(const Vector &oids); | ||
static const Vector &GetMemberOids(const duckdb::LogicalType &type); | ||
}; | ||
|
||
} // namespace pgduckdb |
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -1,15 +1,19 @@ | ||||||||
#include "pgduckdb/catalog/pgduckdb_catalog.hpp" | ||||||||
#include "pgduckdb/catalog/pgduckdb_transaction.hpp" | ||||||||
#include "pgduckdb/catalog/pgduckdb_table.hpp" | ||||||||
#include "pgduckdb/catalog/pgduckdb_type.hpp" | ||||||||
#include "pgduckdb/pgduckdb_types.hpp" | ||||||||
#include "pgduckdb/scan/postgres_scan.hpp" | ||||||||
#include "duckdb/parser/parsed_data/create_table_info.hpp" | ||||||||
#include "duckdb/parser/parsed_data/create_type_info.hpp" | ||||||||
#include "duckdb/parser/parsed_data/create_schema_info.hpp" | ||||||||
#include "duckdb/catalog/catalog.hpp" | ||||||||
|
||||||||
extern "C" { | ||||||||
#include "postgres.h" | ||||||||
#include "catalog/namespace.h" | ||||||||
#include "catalog/pg_class.h" | ||||||||
#include "catalog/pg_type.h" | ||||||||
#include "optimizer/planmain.h" | ||||||||
#include "optimizer/planner.h" | ||||||||
#include "utils/builtins.h" | ||||||||
|
@@ -23,6 +27,7 @@ extern "C" { | |||||||
#include "nodes/makefuncs.h" | ||||||||
#include "nodes/nodeFuncs.h" | ||||||||
#include "parser/parsetree.h" | ||||||||
#include "parser/parse_type.h" | ||||||||
#include "utils/rel.h" | ||||||||
} | ||||||||
|
||||||||
|
@@ -68,6 +73,56 @@ IsIndexScan(const Path *nodePath) { | |||||||
return false; | ||||||||
} | ||||||||
|
||||||||
optional_ptr<CatalogEntry> | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's add a little comment about the assumptions of this function (feel free to correct this comment if my assumptions are incorrect)
Suggested change
|
||||||||
SchemaItems::GetType(const string &entry_name) { | ||||||||
auto it = types.find(entry_name); | ||||||||
if (it != types.end()) { | ||||||||
return it->second.get(); | ||||||||
} | ||||||||
|
||||||||
auto &catalog = schema->catalog; | ||||||||
|
||||||||
List *name_list = NIL; | ||||||||
name_list = lappend(name_list, makeString(pstrdup(name.c_str()))); | ||||||||
name_list = lappend(name_list, makeString(pstrdup(entry_name.c_str()))); | ||||||||
|
||||||||
TypeName *type_name = makeTypeNameFromNameList(name_list); | ||||||||
|
||||||||
// Try to look up the type by name | ||||||||
Oid type_oid = InvalidOid; | ||||||||
HeapTuple type_entry = LookupTypeName(NULL, type_name, NULL, false); | ||||||||
|
||||||||
if (HeapTupleIsValid(type_entry)) { | ||||||||
// Cast to the correct form to access the fields | ||||||||
Form_pg_type type_form = (Form_pg_type)GETSTRUCT(type_entry); | ||||||||
|
||||||||
// Extract the OID from the type entry | ||||||||
type_oid = type_form->oid; | ||||||||
|
||||||||
// Release the system cache entry | ||||||||
ReleaseSysCache(type_entry); | ||||||||
} | ||||||||
|
||||||||
// If the type could not be found, handle it here | ||||||||
if (type_oid == InvalidOid) { | ||||||||
return nullptr; | ||||||||
} | ||||||||
|
||||||||
FormData_pg_attribute dummy_attr; | ||||||||
dummy_attr.atttypid = type_oid; | ||||||||
dummy_attr.atttypmod = 0; | ||||||||
dummy_attr.attndims = 0; | ||||||||
Comment on lines
+113
to
+114
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems incorrect if e.g. it's an array an of enum. I think it would be good to add a test for that, it doesn't have to actually work but at least it should throw a somewhat clear error and not crash. Also I think it would be good to check here for now that the type is an enum, because that's really the only kind of type that we're comfortable with handling here. |
||||||||
Form_pg_attribute attribute = &dummy_attr; | ||||||||
|
||||||||
CreateTypeInfo info; | ||||||||
info.name = entry_name; | ||||||||
info.type = pgduckdb::ConvertPostgresToDuckColumnType(attribute); | ||||||||
|
||||||||
auto type = make_uniq<PostgresType>(catalog, *schema, info); | ||||||||
types[entry_name] = std::move(type); | ||||||||
return types[entry_name].get(); | ||||||||
} | ||||||||
|
||||||||
optional_ptr<CatalogEntry> | ||||||||
SchemaItems::GetTable(const string &entry_name, PlannerInfo *planner_info) { | ||||||||
auto it = tables.find(entry_name); | ||||||||
|
@@ -157,23 +212,27 @@ PostgresTransaction::GetCatalogEntry(CatalogType type, const string &schema, con | |||||||
throw InternalException("Could not find 'postgres_state' in 'PostgresTransaction::GetCatalogEntry'"); | ||||||||
} | ||||||||
auto planner_info = scan_data->m_query_planner_info; | ||||||||
if (type == CatalogType::SCHEMA_ENTRY) { | ||||||||
return GetSchema(schema); | ||||||||
} | ||||||||
|
||||||||
auto it = schemas.find(schema); | ||||||||
if (it == schemas.end()) { | ||||||||
return nullptr; | ||||||||
} | ||||||||
auto &schema_entry = it->second; | ||||||||
|
||||||||
switch (type) { | ||||||||
case CatalogType::TABLE_ENTRY: { | ||||||||
auto it = schemas.find(schema); | ||||||||
if (it == schemas.end()) { | ||||||||
return nullptr; | ||||||||
} | ||||||||
auto &schema_entry = it->second; | ||||||||
return schema_entry.GetTable(name, planner_info); | ||||||||
} | ||||||||
case CatalogType::SCHEMA_ENTRY: { | ||||||||
return GetSchema(schema); | ||||||||
case CatalogType::TYPE_ENTRY: { | ||||||||
return schema_entry.GetType(name); | ||||||||
} | ||||||||
default: | ||||||||
D_ASSERT(type != CatalogType::SCHEMA_ENTRY); | ||||||||
return nullptr; | ||||||||
} | ||||||||
} | ||||||||
|
||||||||
} // namespace duckdb | ||||||||
|
||||||||
// namespace duckdb |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#include "pgduckdb/pgduckdb_types.hpp" | ||
#include "pgduckdb/catalog/pgduckdb_schema.hpp" | ||
#include "pgduckdb/catalog/pgduckdb_type.hpp" | ||
|
||
extern "C" { | ||
#include "postgres.h" | ||
#include "access/tableam.h" | ||
#include "access/heapam.h" | ||
#include "storage/bufmgr.h" | ||
#include "catalog/namespace.h" | ||
#include "catalog/pg_class.h" | ||
#include "optimizer/planmain.h" | ||
#include "optimizer/planner.h" | ||
#include "utils/builtins.h" | ||
#include "utils/regproc.h" | ||
#include "utils/snapmgr.h" | ||
#include "utils/syscache.h" | ||
#include "access/htup_details.h" | ||
#include "parser/parsetree.h" | ||
} | ||
|
||
namespace duckdb { | ||
|
||
PostgresType::PostgresType(Catalog &catalog, SchemaCatalogEntry &schema, CreateTypeInfo &info) | ||
: TypeCatalogEntry(catalog, schema, info) { | ||
} | ||
|
||
} // namespace duckdb |
Uh oh!
There was an error while loading. Please reload this page.