@@ -3,6 +3,7 @@ package iceberg
33import (
44 "context"
55 "fmt"
6+ "maps"
67 "regexp"
78 "runtime"
89 "strings"
@@ -101,7 +102,7 @@ func (i *Iceberg) Setup(ctx context.Context, stream types.StreamInterface, globa
101102 logger .Infof ("Creating destination table [%s] in Iceberg database [%s] for stream [%s]" , i .stream .GetDestinationTable (), i .stream .GetDestinationDatabase (& i .config .IcebergDatabase ), i .stream .Name ())
102103
103104 var requestPayload proto.IcebergPayload
104- iceSchema := utils . Ternary ( stream .NormalizationEnabled (), stream . Schema ().ToIceberg (), icebergRawSchema ()).([] * proto. IcebergPayload_SchemaField )
105+ iceSchema := stream .Schema ().ToIceberg (! stream . NormalizationEnabled () )
105106 requestPayload = proto.IcebergPayload {
106107 Type : proto .IcebergPayload_GET_OR_CREATE_TABLE ,
107108 Metadata : & proto.IcebergPayload_Metadata {
@@ -204,7 +205,7 @@ func (i *Iceberg) Check(ctx context.Context) error {
204205 Metadata : & proto.IcebergPayload_Metadata {
205206 ThreadId : server .serverID ,
206207 DestTableName : destinationDB ,
207- Schema : icebergRawSchema (),
208+ Schema : types . GetIcebergRawSchema (),
208209 },
209210 }
210211
@@ -218,8 +219,8 @@ func (i *Iceberg) Check(ctx context.Context) error {
218219
219220 // try writing record in dest table
220221 currentTime := time .Now ().UTC ()
221- protoSchema := icebergRawSchema ()
222- record := types .CreateRawRecord (destinationDB , map [string ]any {"name" : "olake" }, " r" , & currentTime )
222+ protoSchema := types . GetIcebergRawSchema ()
223+ record := types .CreateRawRecord (map [string ]any {"name" : "olake" }, map [ string ] any { constants . OlakeID : "olake" , constants . OpType : " r" , constants . CdcTimestamp : & currentTime } )
223224 protoColumns , err := legacywriter .RawDataColumnBuffer (record , protoSchema )
224225 if err != nil {
225226 return fmt .Errorf ("failed to create raw data column buffer: %s" , err )
@@ -307,20 +308,12 @@ func (i *Iceberg) FlattenAndCleanData(ctx context.Context, records []types.RawRe
307308 // parallel flatten data and detect schema difference
308309 diffThreadSchema := atomic.Bool {}
309310 err := utils .Concurrent (ctx , records , runtime .GOMAXPROCS (0 )* 16 , func (_ context.Context , record types.RawRecord , idx int ) error {
310- // set pre configured fields
311- records [idx ].Data [constants .OlakeID ] = record .OlakeID
312- records [idx ].Data [constants .OlakeTimestamp ] = time .Now ().UTC ()
313- records [idx ].Data [constants .OpType ] = record .OperationType
314- if record .CdcTimestamp != nil {
315- records [idx ].Data [constants .CdcTimestamp ] = * record .CdcTimestamp
316- }
317-
318- flattenedRecord , err := typeutils .NewFlattener ().Flatten (record .Data )
311+ flattenRecord , err := typeutils .NewFlattener ().Flatten (record .Data )
319312 if err != nil {
320313 return fmt .Errorf ("failed to flatten record, iceberg writer: %s" , err )
321314 }
322- records [idx ].Data = flattenedRecord
323-
315+ records [idx ].Data = flattenRecord
316+ maps . Copy ( records [ idx ]. Data , record . OlakeColumns )
324317 // if schema difference is not detected, detect schema difference
325318 if ! diffThreadSchema .Load () {
326319 // when detectChange is true, the function does not modify schema parameter
@@ -368,7 +361,6 @@ func (i *Iceberg) EvolveSchema(ctx context.Context, globalSchema, recordsRawSche
368361 if ! i .stream .NormalizationEnabled () {
369362 return i .schema , nil
370363 }
371-
372364 // cases as local thread schema has detected changes w.r.t. batch records schema
373365 // i. iceberg table already have changes (i.e. no difference with global schema), in this case
374366 // only refresh table in iceberg for this thread.
@@ -582,18 +574,6 @@ func parseSchema(schemaStr string) (map[string]string, error) {
582574 return fields , nil
583575}
584576
585- // returns raw schema in iceberg format
586- func icebergRawSchema () []* proto.IcebergPayload_SchemaField {
587- var icebergFields []* proto.IcebergPayload_SchemaField
588- for key , typ := range types .RawSchema {
589- icebergFields = append (icebergFields , & proto.IcebergPayload_SchemaField {
590- IceType : typ .ToIceberg (),
591- Key : key ,
592- })
593- }
594- return icebergFields
595- }
596-
597577func getCommonAncestorType (d1 , d2 string ) string {
598578 // check for cases:
599579 // d1: string d2: int -> return string
0 commit comments