Skip to content

Commit 374d30a

Browse files
Refactor IcebergPartitionTransform
IcebergPartitionTransform can embed a IcebergPartitionSpecField by removing some individual fields, which makes it less verbose. Signed-off-by: Aykut Bozkurt <aykut.bozkurt@snowflake.com>
1 parent 455c4f2 commit 374d30a

7 files changed

Lines changed: 19 additions & 44 deletions

File tree

pg_lake_engine/src/data_file/data_file_stats.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ ExtractMinMaxForColumn(Datum map, const char *colName, List **names, List **mins
271271

272272
if (minText != NULL && maxText != NULL)
273273
{
274-
*names = lappend(*names, colName);
274+
*names = lappend(*names, pstrdup(colName));
275275
*mins = lappend(*mins, minText);
276276
*maxs = lappend(*maxs, maxText);
277277
}

pg_lake_iceberg/include/pg_lake/iceberg/api/partitioning.h

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
#include "postgres.h"
2121

22+
#include "pg_lake/iceberg/metadata_spec.h"
2223
#include "pg_lake/parquet/field.h"
2324
#include "pg_lake/pgduck/type.h"
2425
#include "access/attnum.h"
@@ -51,14 +52,7 @@ typedef struct IcebergPartitionTransform
5152
size_t truncateLen;
5253
};
5354

54-
/* partition field id */
55-
int32_t partitionFieldId;
56-
57-
/* <columnName>_<transformName>, e.g. a_bucket */
58-
const char *partitionFieldName;
59-
60-
/* transform name, e.g. bucket[3] */
61-
const char *transformName;
55+
IcebergPartitionSpecField *specField;
6256

6357
/* source field of the column to which transform applies */
6458
DataFileSchemaField *sourceField;

pg_lake_iceberg/src/iceberg/partitioning/partition.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ FindPartitionTransformById(List *transforms, int32_t partitionFieldId, bool erro
225225
{
226226
IcebergPartitionTransform *transform = (IcebergPartitionTransform *) lfirst(cell);
227227

228-
if (transform->partitionFieldId == partitionFieldId)
228+
if (transform->specField->field_id == partitionFieldId)
229229
return transform;
230230
}
231231

pg_lake_iceberg/src/iceberg/partitioning/spec_generation.c

Lines changed: 1 addition & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -60,27 +60,7 @@ BuildPartitionSpecFromPartitionTransforms(Oid relationId, List *partitionTransfo
6060
{
6161
IcebergPartitionTransform *transform = lfirst(transformCell);
6262

63-
IcebergPartitionSpecField *field = palloc0(sizeof(IcebergPartitionSpecField));
64-
65-
field->source_id = transform->sourceField->id;
66-
67-
/*
68-
* We do not support partition transforms on multi columns (v3
69-
* feature), and to comply with the iceberg spec/reference
70-
* implementation for v2, we still fill the source_ids array.
71-
*/
72-
field->source_ids_length = 1;
73-
field->source_ids = palloc0(sizeof(int) * field->source_ids_length);
74-
field->source_ids[0] = transform->sourceField->id;
75-
76-
field->field_id = transform->partitionFieldId;
77-
78-
field->name = pstrdup(transform->partitionFieldName);
79-
field->name_length = strlen(transform->partitionFieldName);
80-
field->transform = pstrdup(transform->transformName);
81-
field->transform_length = strlen(transform->transformName);
82-
83-
spec->fields[fieldIndex] = *field;
63+
spec->fields[fieldIndex] = *(transform->specField);
8464
fieldIndex++;
8565
}
8666

pg_lake_table/include/pg_lake/partitioning/partition_spec_catalog.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ typedef struct IcebergPartitionSpecHashEntry
4343
extern void UpdateDefaultPartitionSpecId(Oid relationId, int specId);
4444
extern void InsertPartitionSpecAndPartitionFields(Oid relationId, IcebergPartitionSpec * spec);
4545
extern int GetLargestSpecId(Oid relationId);
46-
extern List *GetAllIcebergPartitionSpecIds(Oid relationId);
4746
extern PGDLLEXPORT int GetCurrentSpecId(Oid relationId);
4847
extern int GetLargestPartitionFieldId(Oid relationId);
4948
extern IcebergPartitionSpecField * GetIcebergPartitionFieldFromCatalog(Oid relationId, int fieldId);

pg_lake_table/src/fdw/partition_transform.c

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ PartitionTransformsEqual(IcebergPartitionSpec * spec, List *partitionTransforms)
162162
* Iceberg does here:
163163
* https://github.com/apache/iceberg/blob/8b55ac834015ce664f879ecfe1e80a941a994420/api/src/main/java/org/apache/iceberg/PartitionSpec.java#L239-L259
164164
*/
165-
if (strcasecmp(specField->name, transform->partitionFieldName) != 0)
165+
if (strcasecmp(specField->name, transform->specField->name) != 0)
166166
{
167167
return false;
168168
}
@@ -251,9 +251,7 @@ GetPartitionTransformFromSpecField(Oid relationId, IcebergPartitionSpecField * s
251251
{
252252
IcebergPartitionTransform *transform = palloc0(sizeof(IcebergPartitionTransform));
253253

254-
transform->partitionFieldId = specField->field_id;
255-
transform->partitionFieldName = pstrdup(specField->name);
256-
transform->transformName = pstrdup(specField->transform);
254+
transform->specField = specField;
257255

258256
transform->attnum =
259257
GetAttributeForFieldId(relationId, specField->source_id);
@@ -274,7 +272,7 @@ GetPartitionTransformFromSpecField(Oid relationId, IcebergPartitionSpecField * s
274272
}
275273

276274
/* parse transform name */
277-
ParseTransformName(transform->transformName,
275+
ParseTransformName(transform->specField->transform,
278276
&transform->type,
279277
&transform->bucketCount,
280278
&transform->truncateLen);
@@ -413,8 +411,8 @@ ApplyPartitionTransformToTuple(IcebergPartitionTransform * transform, TupleTable
413411
{
414412
PartitionField *field = palloc0(sizeof(PartitionField));
415413

416-
field->field_name = pstrdup(transform->partitionFieldName);
417-
field->field_id = transform->partitionFieldId;
414+
field->field_name = pstrdup(transform->specField->name);
415+
field->field_id = transform->specField->field_id;
418416

419417
bool isNull = false;
420418
Datum columnValue = slot_getattr(slot, transform->attnum, &isNull);
@@ -453,7 +451,7 @@ ApplyPartitionTransformToTuple(IcebergPartitionTransform * transform, TupleTable
453451
ereport(ERROR,
454452
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
455453
errmsg("applying transform %s is not yet support ",
456-
transform->transformName)));
454+
transform->specField->transform)));
457455
}
458456

459457
field->value_type = GetTransformResultAvroType(transform);

pg_lake_table/src/fdw/partitioning/partition_by_parser.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -529,14 +529,18 @@ AnalyzeIcebergTablePartitionBy(Oid relationId, List *transforms)
529529

530530
transform->sourceField = sourceField;
531531

532+
transform->specField = palloc0(sizeof(IcebergPartitionSpecField));
533+
532534
/* set transform name */
533-
transform->transformName = GenerateTransformName(transform);
535+
transform->specField->transform = GenerateTransformName(transform);
536+
transform->specField->transform_length = strlen(transform->specField->transform);
534537

535538
/* set partition field name */
536-
transform->partitionFieldName = GeneratePartitionFieldName(transform, relationId);
539+
transform->specField->name = GeneratePartitionFieldName(transform, relationId);
540+
transform->specField->name_length = strlen(transform->specField->name);
537541

538542
/* set partition field id */
539-
transform->partitionFieldId = ++largestPartitionFieldId;
543+
transform->specField->field_id = ++largestPartitionFieldId;
540544

541545
/* 3) Check column type compatibility. */
542546
EnsureValidTypeForTransform(transform->type, transform->pgType.postgresTypeOid);
@@ -822,7 +826,7 @@ EnsureNoDuplicateTransforms(List *transforms)
822826
ereport(ERROR,
823827
(errcode(ERRCODE_DUPLICATE_OBJECT),
824828
errmsg("\"%s\" transform on column \"%s\" appears multiple times in partition spec",
825-
transform->transformName, transform->columnName)));
829+
transform->specField->transform, transform->columnName)));
826830
}
827831
}
828832
}

0 commit comments

Comments
 (0)