@@ -72,7 +72,10 @@ func Take(ctx context.Context, r arrow.Record, indices *array.Int32) (arrow.Reco
7272 // does not have these columns.
7373 var customTake bool
7474 for i := 0 ; i < int (r .NumCols ()); i ++ {
75- if r .Column (i ).DataType ().ID () == arrow .DICTIONARY || r .Column (i ).DataType ().ID () == arrow .LIST {
75+ if r .Column (i ).DataType ().ID () == arrow .DICTIONARY ||
76+ r .Column (i ).DataType ().ID () == arrow .RUN_END_ENCODED ||
77+ r .Column (i ).DataType ().ID () == arrow .LIST ||
78+ r .Column (i ).DataType ().ID () == arrow .STRUCT {
7679 customTake = true
7780 break
7881 }
@@ -108,8 +111,12 @@ func Take(ctx context.Context, r arrow.Record, indices *array.Int32) (arrow.Reco
108111 switch arr := r .Column (i ).(type ) {
109112 case * array.Dictionary :
110113 g .Go (func () error { return TakeDictColumn (ctx , arr , i , resArr , indices ) })
114+ case * array.RunEndEncoded :
115+ g .Go (func () error { return TakeRunEndEncodedColumn (ctx , arr , i , resArr , indices ) })
111116 case * array.List :
112117 g .Go (func () error { return TakeListColumn (ctx , arr , i , resArr , indices ) })
118+ case * array.Struct :
119+ g .Go (func () error { return TakeStructColumn (ctx , arr , i , resArr , indices ) })
113120 default :
114121 g .Go (func () error { return TakeColumn (ctx , col , i , resArr , indices ) })
115122 }
@@ -140,22 +147,91 @@ func TakeColumn(ctx context.Context, a arrow.Array, idx int, arr []arrow.Array,
140147}
141148
142149func TakeDictColumn (ctx context.Context , a * array.Dictionary , idx int , arr []arrow.Array , indices * array.Int32 ) error {
143- r := array .NewDictionaryBuilderWithDict (
144- compute .GetAllocator (ctx ), a .DataType ().(* arrow.DictionaryType ), a .Dictionary (),
145- ).(* array.BinaryDictionaryBuilder )
146- defer r .Release ()
150+ switch a .Dictionary ().(type ) {
151+ case * array.String , * array.Binary :
152+ r := array .NewDictionaryBuilderWithDict (
153+ compute .GetAllocator (ctx ), a .DataType ().(* arrow.DictionaryType ), a .Dictionary (),
154+ ).(* array.BinaryDictionaryBuilder )
155+ defer r .Release ()
156+
157+ r .Reserve (indices .Len ())
158+ idxBuilder := r .IndexBuilder ()
159+ for _ , i := range indices .Int32Values () {
160+ if a .IsNull (int (i )) {
161+ r .AppendNull ()
162+ continue
163+ }
164+ idxBuilder .Append (a .GetValueIndex (int (i )))
165+ }
147166
148- r .Reserve (indices .Len ())
149- idxBuilder := r .IndexBuilder ()
150- for _ , i := range indices .Int32Values () {
151- if a .IsNull (int (i )) {
152- r .AppendNull ()
167+ arr [idx ] = r .NewArray ()
168+ return nil
169+ case * array.FixedSizeBinary :
170+ r := array .NewDictionaryBuilderWithDict (
171+ compute .GetAllocator (ctx ), a .DataType ().(* arrow.DictionaryType ), a .Dictionary (),
172+ ).(* array.FixedSizeBinaryDictionaryBuilder )
173+ defer r .Release ()
174+
175+ r .Reserve (indices .Len ())
176+ idxBuilder := r .IndexBuilder ()
177+ for _ , i := range indices .Int32Values () {
178+ if a .IsNull (int (i )) {
179+ r .AppendNull ()
180+ continue
181+ }
182+ // TODO: Improve this by not copying actual values.
183+ idxBuilder .Append (a .GetValueIndex (int (i )))
184+ }
185+
186+ arr [idx ] = r .NewArray ()
187+ return nil
188+ }
189+
190+ return nil
191+ }
192+
193+ func TakeRunEndEncodedColumn (ctx context.Context , a * array.RunEndEncoded , idx int , arr []arrow.Array , indices * array.Int32 ) error {
194+ expandedIndexBuilder := array .NewInt32Builder (compute .GetAllocator (ctx ))
195+ defer expandedIndexBuilder .Release ()
196+
197+ dict := a .Values ().(* array.Dictionary )
198+ for i := 0 ; i < a .Len (); i ++ {
199+ if dict .IsNull (a .GetPhysicalIndex (i )) {
200+ expandedIndexBuilder .AppendNull ()
201+ } else {
202+ expandedIndexBuilder .Append (int32 (dict .GetValueIndex (a .GetPhysicalIndex (i ))))
203+ }
204+ }
205+ expandedIndex := expandedIndexBuilder .NewInt32Array ()
206+ defer expandedIndex .Release ()
207+
208+ expandedReorderedArr := make ([]arrow.Array , 1 )
209+ if err := TakeColumn (ctx , expandedIndex , 0 , expandedReorderedArr , indices ); err != nil {
210+ return err
211+ }
212+ expandedReordered := expandedReorderedArr [0 ].(* array.Int32 )
213+ defer expandedReordered .Release ()
214+
215+ b := array .NewRunEndEncodedBuilder (
216+ compute .GetAllocator (ctx ), a .RunEndsArr ().DataType (), a .Values ().DataType (),
217+ )
218+ defer b .Release ()
219+ b .Reserve (indices .Len ())
220+
221+ dictValues := dict .Dictionary ().(* array.String )
222+ for i := 0 ; i < expandedReordered .Len (); i ++ {
223+ if expandedReordered .IsNull (i ) {
224+ b .AppendNull ()
153225 continue
154226 }
155- idxBuilder .Append (a .GetValueIndex (int (i )))
227+ reorderedIndex := expandedReordered .Value (i )
228+ v := dictValues .Value (int (reorderedIndex ))
229+ if err := b .AppendValueFromString (v ); err != nil {
230+ return err
231+ }
156232 }
157233
158- arr [idx ] = r . NewArray ()
234+ arr [idx ] = b . NewRunEndEncodedArray ()
159235 return nil
160236}
161237
@@ -165,6 +241,7 @@ func TakeListColumn(ctx context.Context, a *array.List, idx int, arr []arrow.Arr
165241 if ! ok {
166242 return fmt .Errorf ("unexpected value builder type %T for list column" , r .ValueBuilder ())
167243 }
244+ defer valueBuilder .Release ()
168245
169246 listValues := a .ListValues ().(* array.Dictionary )
170247 switch dictV := listValues .Dictionary ().(type ) {
@@ -200,6 +277,54 @@ func TakeListColumn(ctx context.Context, a *array.List, idx int, arr []arrow.Arr
200277 return nil
201278}
202279
280+ func TakeStructColumn (ctx context.Context , a * array.Struct , idx int , arr []arrow.Array , indices * array.Int32 ) error {
281+ aType := a .Data ().DataType ().(* arrow.StructType )
282+
283+ // Immediately, return this struct if it has no fields/columns
284+ if a .NumField () == 0 {
285+ // If the original record is released and this is released once more,
286+ // as usually done, we want to retain it once more.
287+ a .Retain ()
288+ arr [idx ] = a
289+ return nil
290+ }
291+
292+ cols := make ([]arrow.Array , a .NumField ())
293+ names := make ([]string , a .NumField ())
294+ defer func () {
295+ for _ , col := range cols {
296+ if col != nil {
297+ col .Release ()
298+ }
299+ }
300+ }()
301+
302+ for i := 0 ; i < a .NumField (); i ++ {
303+ names [i ] = aType .Field (i ).Name
304+
305+ switch f := a .Field (i ).(type ) {
306+ case * array.RunEndEncoded :
307+ err := TakeRunEndEncodedColumn (ctx , f , i , cols , indices )
308+ if err != nil {
309+ return err
310+ }
311+ default :
312+ err := TakeColumn (ctx , f , i , cols , indices )
313+ if err != nil {
314+ return err
315+ }
316+ }
317+ }
318+
319+ takeStruct , err := array .NewStructArray (cols , names )
320+ if err != nil {
321+ return err
322+ }
323+
324+ arr [idx ] = takeStruct
325+ return nil
326+ }
327+
203328type multiColSorter struct {
204329 indices * builder.OptInt32Builder
205330 comparisons []comparator
@@ -263,13 +388,21 @@ func newMultiColSorter(
263388 },
264389 bytes .Compare ,
265390 )
391+ case * array.FixedSizeBinary :
392+ ms .comparisons [i ] = newOrderedSorter [[]byte ](
393+ & fixedSizeBinaryDictionary {
394+ dict : e ,
395+ elem : elem ,
396+ },
397+ bytes .Compare ,
398+ )
266399 default :
267400 ms .Release ()
268- return nil , fmt .Errorf ("unsupported dictionary column type for sorting %T" , e )
401+ return nil , fmt .Errorf ("unsupported dictionary column type for sorting %T for column %s " , e , r . Schema (). Field ( col . Index ). Name )
269402 }
270403 default :
271404 ms .Release ()
272- return nil , fmt .Errorf ("unsupported column type for sorting %T" , e )
405+ return nil , fmt .Errorf ("unsupported column type for sorting %T for column %s " , e , r . Schema (). Field ( col . Index ). Name )
273406 }
274407 }
275408 return ms , nil
@@ -417,3 +550,16 @@ func (s *binaryDictionary) IsNull(i int) bool {
417550func (s * binaryDictionary ) Value (i int ) []byte {
418551 return s .elem .Value (s .dict .GetValueIndex (i ))
419552}
553+
554+ type fixedSizeBinaryDictionary struct {
555+ dict * array.Dictionary
556+ elem * array.FixedSizeBinary
557+ }
558+
559+ func (s * fixedSizeBinaryDictionary ) IsNull (i int ) bool {
560+ return s .dict .IsNull (i )
561+ }
562+
563+ func (s * fixedSizeBinaryDictionary ) Value (i int ) []byte {
564+ return s .elem .Value (s .dict .GetValueIndex (i ))
565+ }
0 commit comments