Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 214 additions & 0 deletions plan/builders.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,12 @@ type Builder interface {
// Deprecated: Use VirtualTableFromExpr(...).Remap() instead.
VirtualTableFromExprRemap(fieldNames []string, remap []int32, values ...expr.VirtualTableExpressionValue) (*VirtualTableReadRel, error)
VirtualTableFromExpr(fieldNames []string, values ...expr.VirtualTableExpressionValue) (*VirtualTableReadRel, error)
// VirtualTableFromGoTypes constructs a VirtualTableReadRel from native Go types.
// It accepts field names, tuples of polymorphic Go values, and optional nullability
// configuration. The function automatically maps Go types to appropriate Substrait types
// and handles nil values by converting them to typed null literals.
// If nullableColumns is nil, all columns default to non-nullable (required).
VirtualTableFromGoTypes(fieldNames []string, tuples [][]any, nullableColumns []bool) (*VirtualTableReadRel, error)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not one of the approvers (or reviewers) so... take it or not... I'm not really sure that this kind of logic belongs with the builders. Go types can get pretty complex pretty fast; not sure the library itself should carry this complexity.

But even ignoring the above, if maintainers think it's useful (and , I do think its definitely applicable and a common enough task for Go users of this library), then at least I'd encourage not to expand the Builder interface. I think a standalone function should work just fine; And also, we might want to consider adding it to the "literal" package -- it's a utility that constructs virtual scan from literals; maybe that package is a better home for it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey, thanks for the comment. I think that is a great point. I'll look into moving it to another place briefly. Thanks!

Copy link
Member Author

@benbellick benbellick Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved to the literals package 👍

Also, I know that you're not one of the reviewers, but just rerequested you because I want everyone (to the extent possible) to be happy with the change! Thanks

IcebergTableFromMetadataFile(metadataURI string, snapshot IcebergSnapshot, schema types.NamedStruct) (*IcebergTableReadRel, error)
// Deprecated: Use Sort(...).Remap() instead.
SortRemap(input Rel, remap []int32, sorts ...expr.SortField) (*SortRel, error)
Expand Down Expand Up @@ -619,6 +625,214 @@ func (b *builder) VirtualTable(fields []string, values ...expr.StructLiteralValu
return b.VirtualTableRemap(fields, nil, values...)
}

func (b *builder) VirtualTableFromGoTypes(fieldNames []string, tuples [][]any, nullableColumns []bool) (*VirtualTableReadRel, error) {
// Need at least one tuple to infer column types from Go values.
// Empty virtual tables are valid, but require explicit type specification via VirtualTableFromExpr.
if len(tuples) == 0 {
return nil, fmt.Errorf("%w: must provide at least one tuple for virtual table", substraitgo.ErrInvalidRel)
}

nfields := len(fieldNames)
if nfields == 0 {
return nil, fmt.Errorf("%w: must provide at least one field name", substraitgo.ErrInvalidRel)
}

for i, tuple := range tuples {
if len(tuple) != nfields {
return nil, fmt.Errorf("%w: tuple %d has %d values, expected %d", substraitgo.ErrInvalidRel, i, len(tuple), nfields)
}
}

if nullableColumns == nil {
// default behavior is that none of the columns are nullable
nullableColumns = make([]bool, nfields)
} else if len(nullableColumns) != nfields {
return nil, fmt.Errorf("%w: nullableColumns length (%d) must match fieldNames length (%d) or be nil", substraitgo.ErrInvalidRel, len(nullableColumns), nfields)
}

columnTypes, err := inferColumnTypesFromGoTypes(b, tuples, fieldNames, nullableColumns)
if err != nil {
return nil, err
}

if err := validateColumnTypesFromGoTypes(b, tuples, fieldNames, nullableColumns, columnTypes); err != nil {
return nil, err
}

values, err := convertGoTuplesToExpressions(b, tuples, fieldNames, columnTypes)
if err != nil {
return nil, err
}

baseSchema := types.NamedStruct{
Names: fieldNames,
Struct: types.StructType{
Nullability: types.NullabilityRequired,
Types: columnTypes,
},
}

return &VirtualTableReadRel{
baseReadRel: baseReadRel{
RelCommon: RelCommon{},
baseSchema: baseSchema,
},
values: values,
}, nil
}

func (b *builder) goTypeToSubstraitType(val any, nullable bool) (types.Type, error) {
nullability := types.NullabilityRequired
if nullable {
nullability = types.NullabilityNullable
}

switch val.(type) {
case bool:
return &types.BooleanType{Nullability: nullability}, nil
case int8:
return &types.Int8Type{Nullability: nullability}, nil
case int16:
return &types.Int16Type{Nullability: nullability}, nil
case int32:
return &types.Int32Type{Nullability: nullability}, nil
case int:
return &types.Int64Type{Nullability: nullability}, nil
case int64:
return &types.Int64Type{Nullability: nullability}, nil
case float32:
return &types.Float32Type{Nullability: nullability}, nil
case float64:
return &types.Float64Type{Nullability: nullability}, nil
case string:
return &types.StringType{Nullability: nullability}, nil
default:
return nil, fmt.Errorf("unsupported Go type: %T", val)
}
}

func (b *builder) goValueToExpression(val any, expectedType types.Type) (expr.Expression, error) {
actualType, err := b.goTypeToSubstraitType(val, false)
if err != nil {
return nil, err
}

// Compare base types (ignore nullability for this check)
actualBase := actualType.WithNullability(types.NullabilityRequired)
expectedBase := expectedType.WithNullability(types.NullabilityRequired)

if !actualBase.Equals(expectedBase) {
return nil, fmt.Errorf("type mismatch: got %T, expected type compatible with %s", val, expectedType)
}

switch v := val.(type) {
case bool:
return expr.NewPrimitiveLiteral(v, expectedType.GetNullability() == types.NullabilityNullable), nil
case int8:
return expr.NewPrimitiveLiteral(v, expectedType.GetNullability() == types.NullabilityNullable), nil
case int16:
return expr.NewPrimitiveLiteral(v, expectedType.GetNullability() == types.NullabilityNullable), nil
case int32:
return expr.NewPrimitiveLiteral(v, expectedType.GetNullability() == types.NullabilityNullable), nil
case int:
return expr.NewPrimitiveLiteral(int64(v), expectedType.GetNullability() == types.NullabilityNullable), nil
case int64:
return expr.NewPrimitiveLiteral(v, expectedType.GetNullability() == types.NullabilityNullable), nil
case float32:
return expr.NewPrimitiveLiteral(v, expectedType.GetNullability() == types.NullabilityNullable), nil
case float64:
return expr.NewPrimitiveLiteral(v, expectedType.GetNullability() == types.NullabilityNullable), nil
case string:
return expr.NewPrimitiveLiteral(v, expectedType.GetNullability() == types.NullabilityNullable), nil
default:
return nil, fmt.Errorf("unsupported value type: %T", val)
}
}

// inferColumnTypesFromGoTypes infers Substrait types from the first non-null value in each column
func inferColumnTypesFromGoTypes(b *builder, tuples [][]any, fieldNames []string, nullableColumns []bool) ([]types.Type, error) {
nfields := len(fieldNames)
columnTypes := make([]types.Type, nfields)

for colIdx := range nfields {
var foundType types.Type
for rowIdx := range len(tuples) {
val := tuples[rowIdx][colIdx]
if val != nil {
var err error
foundType, err = b.goTypeToSubstraitType(val, nullableColumns[colIdx])
if err != nil {
return nil, fmt.Errorf("failed to infer type for column %d (%s): %w", colIdx, fieldNames[colIdx], err)
}
break
}
}

if foundType == nil {
return nil, fmt.Errorf("%w: column %d (%s) contains only null values, cannot infer type", substraitgo.ErrInvalidRel, colIdx, fieldNames[colIdx])
}

columnTypes[colIdx] = foundType
}
return columnTypes, nil
}

// Validate that the values in each column of every row conform to the type specified in columnTypes
func validateColumnTypesFromGoTypes(b *builder, tuples [][]any, fieldNames []string, nullableColumns []bool, columnTypes []types.Type) error {
nfields := len(fieldNames)

for colIdx := range nfields {
expectedType := columnTypes[colIdx]

for rowIdx := range len(tuples) {
val := tuples[rowIdx][colIdx]
if val != nil {
currentType, err := b.goTypeToSubstraitType(val, nullableColumns[colIdx])
if err != nil {
return fmt.Errorf("invalid type in row %d, col %d (%s): %w", rowIdx, colIdx, fieldNames[colIdx], err)
}

// Compare base types (ignore nullability for this check)
expectedBase := expectedType.WithNullability(types.NullabilityRequired)
currentBase := currentType.WithNullability(types.NullabilityRequired)

if !expectedBase.Equals(currentBase) {
return fmt.Errorf("%w: type mismatch in column %d (%s): found %T in row %d, expected type compatible with %s",
substraitgo.ErrInvalidRel, colIdx, fieldNames[colIdx], val, rowIdx, expectedType)
}
}
}
}
return nil
}

func convertGoTuplesToExpressions(b *builder, tuples [][]any, fieldNames []string, columnTypes []types.Type) ([]expr.VirtualTableExpressionValue, error) {
nfields := len(fieldNames)
values := make([]expr.VirtualTableExpressionValue, len(tuples))

for rowIdx, tuple := range tuples {
row := make(expr.VirtualTableExpressionValue, nfields)

for colIdx, val := range tuple {
expectedType := columnTypes[colIdx]

if val == nil {
row[colIdx] = expr.NewNullLiteral(expectedType)
} else {
exprVal, err := b.goValueToExpression(val, expectedType)
if err != nil {
return nil, fmt.Errorf("failed to convert value at row %d, col %d (%s): %w", rowIdx, colIdx, fieldNames[colIdx], err)
}
row[colIdx] = exprVal
}
}

values[rowIdx] = row
}

return values, nil
}

func (b *builder) IcebergTableFromMetadataFile(metadataURI string, snapshot IcebergSnapshot, schema types.NamedStruct) (*IcebergTableReadRel, error) {
tableType := &Direct{}
tableType.MetadataUri = metadataURI
Expand Down
Loading