Skip to content

Error filtering an array of struct column #11798

@mwiebusch78

Description

@mwiebusch78

What happened?

I am trying to use the filter method on a column with type 'array of struct', but I am not able to access the fields of the struct. Here is a minimal example:

import ibis
from ibis import _
ibis.set_backend('polars')  # also fails with BigQuery backend

t = t = ibis.memtable(
    {
        "id": [1, 2],
        "arr": [
            [
                {'x': 1, 'y': 2},
                {'x': 3, 'y': 4},
            ],
            [
                {'x': 10, 'y': 20},
                {'x': 30, 'y': 40},
            ],
        ],
    }
)

t.select(_['arr'].filter(_['x'] == 1))
# This fails with
# IbisTypeError: Column 'x' is not found in table. Existing columns: 'id', 'arr'.
# Looks like it doesn't realise that the underscore inside `filter` refers to the array elements, not the table.

t.select(_['arr'].filter(lambda r: r['x'] == 1))
# This fails with 'Translation to backend failed'.

What version of ibis are you using?

10.8.0

What backend(s) are you using, if any?

polars, BigQuery. Issue seems to be backend-independent.

Relevant log output

Traceback for the deferred method:

---------------------------------------------------------------------------
IbisTypeError                             Traceback (most recent call last)
Cell In[50], line 19
      1 t = ibis.memtable(
      2     {
      3         "id": [1, 2],
   (...)     14     }
     15 )
     17 # t.select(_['arr'].filter(lambda r: r['x'] == 1))
---> 19 t.select(_['arr'].filter(_['x'] == 1))

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/expr/types/relations.py:2553, in Table.select(self, *exprs, **named_exprs)
   2549 # note that if changes are made to implementation of select,
   2550 # corresponding changes may be needed in `.mutate()`
   2551 from ibis.expr.rewrites import rewrite_project_input
-> 2553 values = self.bind(*exprs, **named_exprs)
   2554 values = unwrap_aliases(values)
   2555 if not values:

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/expr/types/relations.py:661, in Table.bind(self, *args, **kwargs)
    642 def bind(self, *args: Any, **kwargs: Any) -> tuple[Value, ...]:
    643     """Bind column values to a table expression.
    644 
    645     This method handles the binding of every kind of column-like value that
   (...)    659         A tuple of bound values
    660     """
--> 661     values = self._fast_bind(*args, **kwargs)
    662     # dereference the values to `self`
    663     dm = DerefMap.from_targets(self.op())

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/expr/types/relations.py:628, in Table._fast_bind(self, *args, **kwargs)
    626 values = []
    627 for arg in args:
--> 628     values.extend(bind(self, arg))
    630 # bind keyword arguments where each entry can produce only one value
    631 # which is then named with the given key
    632 for key, arg in kwargs.items():

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/expr/types/relations.py:472, in bind(table, value)
    470         yield ops.Field(value, name).to_expr()
    471 elif isinstance(value, Deferred):
--> 472     yield value.resolve(table)
    473 elif isinstance(value, Resolver):
    474     yield value.resolve({"_": table})

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/common/deferred.py:93, in Deferred.resolve(self, _, **kwargs)
     91 def resolve(self, _=None, **kwargs):
     92     context = {"_": _, **kwargs}
---> 93     return self._resolver.resolve(context)

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/common/deferred.py:411, in Call.resolve(self, context)
    409 def resolve(self, context):
    410     func = self.func.resolve(context)
--> 411     args = tuple(arg.resolve(context) for arg in self.args)
    412     kwargs = {k: v.resolve(context) for k, v in self.kwargs.items()}
    413     return func(*args, **kwargs)

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/common/deferred.py:411, in <genexpr>(.0)
    409 def resolve(self, context):
    410     func = self.func.resolve(context)
--> 411     args = tuple(arg.resolve(context) for arg in self.args)
    412     kwargs = {k: v.resolve(context) for k, v in self.kwargs.items()}
    413     return func(*args, **kwargs)

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/common/deferred.py:487, in BinaryOperator.resolve(self, context)
    486 def resolve(self, context):
--> 487     left = self.left.resolve(context)
    488     right = self.right.resolve(context)
    489     return self.func(left, right)

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/common/deferred.py:372, in Item.resolve(self, context)
    370 obj = self.obj.resolve(context)
    371 idx = self.indexer.resolve(context)
--> 372 return obj[idx]

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/expr/types/relations.py:1056, in Table.__getitem__(self, what)
   1053 from ibis.expr.types.logical import BooleanValue
   1055 if isinstance(what, str):
-> 1056     return ops.Field(self.op(), what).to_expr()
   1057 elif isinstance(what, int):
   1058     return ops.Field(self.op(), self.columns[what]).to_expr()

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/common/bases.py:72, in AbstractMeta.__call__(cls, *args, **kwargs)
     52 def __call__(cls, *args, **kwargs):
     53     """Create a new instance of the class.
     54 
     55     The subclass may override the `__create__` classmethod to change the
   (...)     70 
     71     """
---> 72     return cls.__create__(*args, **kwargs)

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/common/grounds.py:120, in Annotable.__create__(cls, *args, **kwargs)
    116 @classmethod
    117 def __create__(cls, *args: Any, **kwargs: Any) -> Self:
    118     # construct the instance by passing only validated keyword arguments
    119     kwargs = cls.__signature__.validate(cls, args, kwargs)
--> 120     return super().__create__(**kwargs)

File ~/.pyenv/versions/adhoc/lib/python3.12/site-packages/ibis/expr/operations/relations.py:96, in Field.__init__(self, rel, name)
     94 if name not in rel.schema:
     95     columns_formatted = ", ".join(map(repr, rel.schema.names))
---> 96     raise IbisTypeError(
     97         f"Column {name!r} is not found in table. "
     98         f"Existing columns: {columns_formatted}."
     99     )
    100 super().__init__(rel=rel, name=name)

IbisTypeError: Column 'x' is not found in table. Existing columns: 'id', 'arr'.


Output from the call using a lambda function:

Translation to backend failed
Error message: OperationNotDefinedError("No translation rule for <class 'ibis.expr.operations.arrays.ArrayFilter'>")
Expression repr follows:
r0 := InMemoryTable
  data:
    PandasDataFrameProxy:
         id                                       arr
      0   1      [{'x': 1, 'y': 2}, {'x': 3, 'y': 4}]
      1   2  [{'x': 10, 'y': 20}, {'x': 30, 'y': 40}]

Project[r0]
  ArrayFilter(arr, Equals(x, 1), r): ArrayFilter(r0.arr, body=StructField(Argument(name='r', shape=<ibis.expr.datashape.Columnar object at 0xffff6006ad80>, dtype={'x': int64, 'y': int64}), field='x') == 1, param=Argument(name='r', shape=<ibis.expr.datashape.Columnar object at 0xffff6006ad80>, dtype={'x': int64, 'y': int64}))

Code of Conduct

  • I agree to follow this project's Code of Conduct

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugIncorrect behavior inside of ibis

    Type

    No type

    Projects

    Status

    backlog

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions