Description
Is your feature request related to a problem?
This was discussed over on Zulip:
One of my tables in BigQuery receives a payload in JSON format from Pub/Sub, and it's stored in BigQuery with the JSON type, but I'm unable to perform map() functions on it in Ibis with the following error:
OperationNotDefinedError: No translation rule for <class 'ibis.expr.operations.maps.MapKeys'>
In [1]: import ibis
In [2]: con = ibis.bigquery.connect(project_id="<my project>", dataset_id="devDS")
In [3]: t = con.table("events_dev_cut")
In [4]: t.columns
Out[4]: ['ID', 'BEEID', 'publishedTime', 'payload', 'ingestionTime', 'p_daily']
In [5]: t
Out[5]:
DatabaseTable: events_dev_cut
ID string
BEEID string
publishedTime timestamp('UTC')
payload json
ingestionTime timestamp('UTC')
p_daily date
In [6]: t.payload.map.keys()
Out[6]:
r0 := DatabaseTable: events_dev_cut
ID string
BEEID string
publishedTime timestamp('UTC')
payload json
ingestionTime timestamp('UTC')
p_daily date
MapKeys(ToJSONMap(payload)): MapKeys(ToJSONMap(r0.payload))
In [7]: t.payload.map.keys().execute()
---------------------------------------------------------------------------
OperationNotDefinedError Traceback (most recent call last)
Cell In[7], line 1
----> 1 t.payload.map.keys().execute()
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/expr/types/core.py:324, in Expr.execute(self, limit, timecontext, params, **kwargs)
297 def execute(
298 self,
299 limit: int | str | None = "default",
(...)
302 **kwargs: Any,
303 ):
304 """Execute an expression against its backend if one exists.
305
306 Parameters
(...)
322 Keyword arguments
323 """
--> 324 return self._find_backend(use_default=True).execute(
325 self, limit=limit, timecontext=timecontext, params=params, **kwargs
326 )
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/bigquery/__init__.py:694, in Backend.execute(self, expr, params, limit, **kwargs)
692 kwargs.pop("timecontext", None)
693 self._register_in_memory_tables(expr)
--> 694 sql = self.compile(expr, limit=limit, params=params, **kwargs)
695 self._log(sql)
696 cursor = self.raw_sql(sql, params=params, **kwargs)
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/bigquery/__init__.py:617, in Backend.compile(self, expr, limit, params, **_)
615 self._make_session()
616 self._define_udf_translation_rules(expr)
--> 617 sql = self.compiler.to_ast_ensure_limit(expr, limit, params=params).compile()
619 return ";\n\n".join(
620 # convert unnest function calls to explode
621 query.transform(_anonymous_unnest_to_explode)
(...)
631 for query in sg.parse(sql, read=self.name)
632 )
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/base.py:37, in QueryAST.compile(self)
35 def compile(self):
36 compiled_setup_queries = [q.compile() for q in self.setup_queries]
---> 37 compiled_queries = [q.compile() for q in self.queries]
38 compiled_teardown_queries = [q.compile() for q in self.teardown_queries]
39 return self.context.collapse(
40 list(
41 chain(
(...)
46 )
47 )
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/base.py:37, in <listcomp>(.0)
35 def compile(self):
36 compiled_setup_queries = [q.compile() for q in self.setup_queries]
---> 37 compiled_queries = [q.compile() for q in self.queries]
38 compiled_teardown_queries = [q.compile() for q in self.teardown_queries]
39 return self.context.collapse(
40 list(
41 chain(
(...)
46 )
47 )
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/query_builder.py:269, in Select.compile(self)
266 with_frag = self.format_subqueries()
268 # SELECT
--> 269 select_frag = self.format_select_set()
271 # FROM, JOIN, UNION
272 from_frag = self.format_table_set()
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/query_builder.py:324, in Select.format_select_set(self)
322 for node in self.select_set:
323 if isinstance(node, ops.Value):
--> 324 expr_str = self._translate(node, named=True, permit_subquery=True)
325 elif isinstance(node, ops.TableNode):
326 alias = context.get_ref(node)
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/query_builder.py:239, in Select._translate(self, expr, named, permit_subquery, within_where)
231 def _translate(self, expr, named=False, permit_subquery=False, within_where=False):
232 translator = self.translator_class(
233 expr,
234 context=self.context,
(...)
237 within_where=within_where,
238 )
--> 239 return translator.get_result()
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/translator.py:224, in ExprTranslator.get_result(self)
222 def get_result(self):
223 """Compile SQL expression into a string."""
--> 224 translated = self.translate(self.node)
225 if self._needs_name(self.node):
226 # TODO: this could fail in various ways
227 translated = self.name(translated, self.node.name)
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/translator.py:256, in ExprTranslator.translate(self, op)
254 elif type(op) in self._registry:
255 formatter = self._registry[type(op)]
--> 256 return formatter(self, op)
257 else:
258 raise com.OperationNotDefinedError(f"No translation rule for {type(op)}")
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/registry/main.py:23, in alias(translator, op)
20 def alias(translator, op):
21 # just compile the underlying argument because the naming is handled
22 # by the translator for the top level expression
---> 23 return translator.translate(op.arg)
File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/translator.py:258, in ExprTranslator.translate(self, op)
256 return formatter(self, op)
257 else:
--> 258 raise com.OperationNotDefinedError(f"No translation rule for {type(op)}")
OperationNotDefinedError: No translation rule for <class 'ibis.expr.operations.maps.MapKeys'>
Describe the solution you'd like
Ideally BigQuery JSON type columns would be treated in the same way as they are in DuckDB. This should allow for SQL generation which can then be executed directly in BigQuery, or automated with tools like Dataform.
I have a work around which is to extract from my BigQuery table, then move that into PyArrow, then into DuckDB and finally casting the column in DuckDB back to JSON type. This is okay for local development and exploratory data analysis but it isn't as practical for applying those transformations on to the source data without dependencies eg a local environment, or a container from which to deploy from.
What version of ibis are you running?
8.0.0
What backend(s) are you using, if any?
BigQuery, DuckDB
Code of Conduct
- I agree to follow this project's Code of Conduct
Metadata
Metadata
Assignees
Type
Projects
Status