Skip to content

feat: Improved BigQuery JSON Type Support #8794

Open
@dixdotdev

Description

@dixdotdev

Is your feature request related to a problem?

This was discussed over on Zulip:

One of my tables in BigQuery receives a payload in JSON format from Pub/Sub, and it's stored in BigQuery with the JSON type, but I'm unable to perform map() functions on it in Ibis with the following error:
OperationNotDefinedError: No translation rule for <class 'ibis.expr.operations.maps.MapKeys'>

In [1]: import ibis

In [2]: con = ibis.bigquery.connect(project_id="<my project>", dataset_id="devDS")

In [3]: t = con.table("events_dev_cut")

In [4]: t.columns
Out[4]: ['ID', 'BEEID', 'publishedTime', 'payload', 'ingestionTime', 'p_daily']

In [5]: t
Out[5]:
DatabaseTable: events_dev_cut
  ID            string
  BEEID         string
  publishedTime timestamp('UTC')
  payload       json
  ingestionTime timestamp('UTC')
  p_daily       date

In [6]: t.payload.map.keys()
Out[6]:
r0 := DatabaseTable: events_dev_cut
  ID            string
  BEEID         string
  publishedTime timestamp('UTC')
  payload       json
  ingestionTime timestamp('UTC')
  p_daily       date

MapKeys(ToJSONMap(payload)): MapKeys(ToJSONMap(r0.payload))

In [7]: t.payload.map.keys().execute()
---------------------------------------------------------------------------
OperationNotDefinedError                  Traceback (most recent call last)
Cell In[7], line 1
----> 1 t.payload.map.keys().execute()

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/expr/types/core.py:324, in Expr.execute(self, limit, timecontext, params, **kwargs)
    297 def execute(
    298     self,
    299     limit: int | str | None = "default",
   (...)
    302     **kwargs: Any,
    303 ):
    304     """Execute an expression against its backend if one exists.
    305 
    306     Parameters
   (...)
    322         Keyword arguments
    323     """
--> 324     return self._find_backend(use_default=True).execute(
    325         self, limit=limit, timecontext=timecontext, params=params, **kwargs
    326     )

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/bigquery/__init__.py:694, in Backend.execute(self, expr, params, limit, **kwargs)
    692 kwargs.pop("timecontext", None)
    693 self._register_in_memory_tables(expr)
--> 694 sql = self.compile(expr, limit=limit, params=params, **kwargs)
    695 self._log(sql)
    696 cursor = self.raw_sql(sql, params=params, **kwargs)

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/bigquery/__init__.py:617, in Backend.compile(self, expr, limit, params, **_)
    615 self._make_session()
    616 self._define_udf_translation_rules(expr)
--> 617 sql = self.compiler.to_ast_ensure_limit(expr, limit, params=params).compile()
    619 return ";\n\n".join(
    620     # convert unnest function calls to explode
    621     query.transform(_anonymous_unnest_to_explode)
   (...)
    631     for query in sg.parse(sql, read=self.name)
    632 )

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/base.py:37, in QueryAST.compile(self)
     35 def compile(self):
     36     compiled_setup_queries = [q.compile() for q in self.setup_queries]
---> 37     compiled_queries = [q.compile() for q in self.queries]
     38     compiled_teardown_queries = [q.compile() for q in self.teardown_queries]
     39     return self.context.collapse(
     40         list(
     41             chain(
   (...)
     46         )
     47     )

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/base.py:37, in <listcomp>(.0)
     35 def compile(self):
     36     compiled_setup_queries = [q.compile() for q in self.setup_queries]
---> 37     compiled_queries = [q.compile() for q in self.queries]
     38     compiled_teardown_queries = [q.compile() for q in self.teardown_queries]
     39     return self.context.collapse(
     40         list(
     41             chain(
   (...)
     46         )
     47     )

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/query_builder.py:269, in Select.compile(self)
    266 with_frag = self.format_subqueries()
    268 # SELECT
--> 269 select_frag = self.format_select_set()
    271 # FROM, JOIN, UNION
    272 from_frag = self.format_table_set()

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/query_builder.py:324, in Select.format_select_set(self)
    322 for node in self.select_set:
    323     if isinstance(node, ops.Value):
--> 324         expr_str = self._translate(node, named=True, permit_subquery=True)
    325     elif isinstance(node, ops.TableNode):
    326         alias = context.get_ref(node)

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/query_builder.py:239, in Select._translate(self, expr, named, permit_subquery, within_where)
    231 def _translate(self, expr, named=False, permit_subquery=False, within_where=False):
    232     translator = self.translator_class(
    233         expr,
    234         context=self.context,
   (...)
    237         within_where=within_where,
    238     )
--> 239     return translator.get_result()

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/translator.py:224, in ExprTranslator.get_result(self)
    222 def get_result(self):
    223     """Compile SQL expression into a string."""
--> 224     translated = self.translate(self.node)
    225     if self._needs_name(self.node):
    226         # TODO: this could fail in various ways
    227         translated = self.name(translated, self.node.name)

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/translator.py:256, in ExprTranslator.translate(self, op)
    254 elif type(op) in self._registry:
    255     formatter = self._registry[type(op)]
--> 256     return formatter(self, op)
    257 else:
    258     raise com.OperationNotDefinedError(f"No translation rule for {type(op)}")

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/registry/main.py:23, in alias(translator, op)
     20 def alias(translator, op):
     21     # just compile the underlying argument because the naming is handled
     22     # by the translator for the top level expression
---> 23     return translator.translate(op.arg)

File ~/src/etlcore/.venv/lib/python3.11/site-packages/ibis/backends/base/sql/compiler/translator.py:258, in ExprTranslator.translate(self, op)
    256     return formatter(self, op)
    257 else:
--> 258     raise com.OperationNotDefinedError(f"No translation rule for {type(op)}")

OperationNotDefinedError: No translation rule for <class 'ibis.expr.operations.maps.MapKeys'>

Describe the solution you'd like

Ideally BigQuery JSON type columns would be treated in the same way as they are in DuckDB. This should allow for SQL generation which can then be executed directly in BigQuery, or automated with tools like Dataform.

I have a work around which is to extract from my BigQuery table, then move that into PyArrow, then into DuckDB and finally casting the column in DuckDB back to JSON type. This is okay for local development and exploratory data analysis but it isn't as practical for applying those transformations on to the source data without dependencies eg a local environment, or a container from which to deploy from.

What version of ibis are you running?

8.0.0

What backend(s) are you using, if any?

BigQuery, DuckDB

Code of Conduct

  • I agree to follow this project's Code of Conduct

Metadata

Metadata

Assignees

No one assigned

    Labels

    bigqueryThe BigQuery backendfeatureFeatures or general enhancements

    Type

    No type

    Projects

    Status

    backlog

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions