From d028c5507def69a332fd1aa66fa232af67013f84 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 12 Jun 2026 15:27:54 -0400 Subject: [PATCH 1/8] stub db scan --- polars_to_ibis/_scan.py | 9 +++++++++ tests/test_scan.py | 23 +++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 polars_to_ibis/_scan.py create mode 100644 tests/test_scan.py diff --git a/polars_to_ibis/_scan.py b/polars_to_ibis/_scan.py new file mode 100644 index 0000000..c777dc4 --- /dev/null +++ b/polars_to_ibis/_scan.py @@ -0,0 +1,9 @@ +import ibis +import polars as pl + + +def scan_database(ibis_backend_name: str, table_name: str, **connect_kwargs: str): + connection = getattr(ibis, ibis_backend_name).connect(**connect_kwargs) + schema = connection.get_schema(table_name) + + pl.LazyFrame(schema=schema) diff --git a/tests/test_scan.py b/tests/test_scan.py new file mode 100644 index 0000000..788db21 --- /dev/null +++ b/tests/test_scan.py @@ -0,0 +1,23 @@ +import ibis +import polars as pl + +from polars_to_ibis._scan import scan_database + + +def test_scan_database(): + backend = "sqlite" + table_name = "default_table" + + # Used to populate DB, not downstream. + connection = getattr(ibis, backend).connect() # type: ignore + input_df = pl.DataFrame( + { + "ints": [1, 2, 3, 4], + "floats": [0.1, 0.2, 0.3, 0.4], + } + ) + + connection.create_table(table_name, input_df, overwrite=True) # type: ignore + + # TODO: Fix ibis.common.exceptions.TableNotFound + scan_database(backend, table_name) From d136595ee20e47e661ad0130f85855f4d1382ca8 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 12 Jun 2026 16:34:58 -0400 Subject: [PATCH 2/8] scan_database no error --- polars_to_ibis/_scan.py | 11 +++++------ tests/test_scan.py | 4 +--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/polars_to_ibis/_scan.py b/polars_to_ibis/_scan.py index c777dc4..38c194e 100644 --- a/polars_to_ibis/_scan.py +++ b/polars_to_ibis/_scan.py @@ -1,9 +1,8 @@ -import ibis -import polars as pl +from typing import Any +import polars as pl -def scan_database(ibis_backend_name: str, table_name: str, **connect_kwargs: str): - connection = getattr(ibis, ibis_backend_name).connect(**connect_kwargs) - schema = connection.get_schema(table_name) - pl.LazyFrame(schema=schema) +def scan_database(connection: Any, table_name: str): + ibis_schema = connection.get_schema(table_name) + return pl.LazyFrame(schema=ibis_schema.to_polars()) diff --git a/tests/test_scan.py b/tests/test_scan.py index 788db21..be2bfc8 100644 --- a/tests/test_scan.py +++ b/tests/test_scan.py @@ -18,6 +18,4 @@ def test_scan_database(): ) connection.create_table(table_name, input_df, overwrite=True) # type: ignore - - # TODO: Fix ibis.common.exceptions.TableNotFound - scan_database(backend, table_name) + scan_database(connection, table_name) From 148f888f41ec1fefec4fa93ed00740dc65a298ad Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 12 Jun 2026 17:05:31 -0400 Subject: [PATCH 3/8] add scan_database at the top level --- polars_to_ibis/__init__.py | 5 +++++ polars_to_ibis/_scan.py | 8 -------- tests/test_parser.py | 29 +++++++++++++++++------------ tests/test_scan.py | 21 --------------------- 4 files changed, 22 insertions(+), 41 deletions(-) delete mode 100644 polars_to_ibis/_scan.py delete mode 100644 tests/test_scan.py diff --git a/polars_to_ibis/__init__.py b/polars_to_ibis/__init__.py index 3671470..3f8df4e 100644 --- a/polars_to_ibis/__init__.py +++ b/polars_to_ibis/__init__.py @@ -41,6 +41,11 @@ def _check_version(): ) +def scan_database(connection: Any, table_name: str): + ibis_schema = connection.get_schema(table_name) + return pl.LazyFrame(schema=ibis_schema.to_polars()) + + def convert_polars_to_ibis(lf: pl.LazyFrame, table_name: str) -> ibis.Table: """ Convert a Polars LazyFrame to an Ibis unbound table. diff --git a/polars_to_ibis/_scan.py b/polars_to_ibis/_scan.py deleted file mode 100644 index 38c194e..0000000 --- a/polars_to_ibis/_scan.py +++ /dev/null @@ -1,8 +0,0 @@ -from typing import Any - -import polars as pl - - -def scan_database(connection: Any, table_name: str): - ibis_schema = connection.get_schema(table_name) - return pl.LazyFrame(schema=ibis_schema.to_polars()) diff --git a/tests/test_parser.py b/tests/test_parser.py index 1fe2bbe..56cb3ca 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -6,7 +6,7 @@ import polars as pl import pytest -from polars_to_ibis import convert_polars_to_ibis +from polars_to_ibis import convert_polars_to_ibis, scan_database from polars_to_ibis._parse.table_handlers import update_polars_to_ibis from .fixtures import Fixture, fixtures, input_data @@ -41,7 +41,6 @@ def get_connection(df: pl.DataFrame, table_name: str, backend: str): # Test fixtures: backends = [ - "polars", "sqlite", "duckdb", pytest.param("postgres", marks=pytest.mark.extra_install), @@ -76,19 +75,20 @@ def assert_error_or_none( @pytest.mark.parametrize( "fixture", fixtures, ids=lambda fixture: f"{fixture.category}-{fixture.expression}" ) -@pytest.mark.parametrize("backend", backends) -@pytest.mark.parametrize("exporter_key", exporters.keys()) # type: ignore -def test_translate_table(fixture: Fixture, backend: str, exporter_key: str): - # Sanity check: Does the polars expression have the expected result? - lf = pl.LazyFrame(input_data[fixture.category]) +def test_fixture_consistency(fixture: Fixture): + # Does the polars expression have the expected result? + lf = pl.LazyFrame(input_data[fixture.category]) # noqa: F841 polars_output = eval(fixture.expression).collect().to_dict(as_series=False) - assert polars_output == fixture.expected_output, "Typo in test?" + assert polars_output == fixture.expected_output, "Typo in fixture?" - # Convert polars to ibis, but without any data: - lf = pl.LazyFrame(schema=lf.collect_schema()) - lf = eval(fixture.expression) + +@pytest.mark.parametrize( + "fixture", fixtures, ids=lambda fixture: f"{fixture.category}-{fixture.expression}" +) +@pytest.mark.parametrize("backend", backends) +@pytest.mark.parametrize("exporter_key", exporters.keys()) # type: ignore +def test_translate_table_new(fixture: Fixture, backend: str, exporter_key: str): table_name = "default_table" - ibis_table = convert_polars_to_ibis(lf, table_name) # Set up target database, with data: input_df = pl.DataFrame(input_data[fixture.category]) @@ -98,6 +98,11 @@ def test_translate_table(fixture: Fixture, backend: str, exporter_key: str): lambda: get_connection(input_df, table_name=table_name, backend=backend), ) + lf = scan_database(connection, table_name) + lf = eval(fixture.expression) + + ibis_table = convert_polars_to_ibis(lf, table_name) + # Run query on target database: export = exporters[exporter_key] # type: ignore expected_backend_error = fixture.backend_errors.get( diff --git a/tests/test_scan.py b/tests/test_scan.py deleted file mode 100644 index be2bfc8..0000000 --- a/tests/test_scan.py +++ /dev/null @@ -1,21 +0,0 @@ -import ibis -import polars as pl - -from polars_to_ibis._scan import scan_database - - -def test_scan_database(): - backend = "sqlite" - table_name = "default_table" - - # Used to populate DB, not downstream. - connection = getattr(ibis, backend).connect() # type: ignore - input_df = pl.DataFrame( - { - "ints": [1, 2, 3, 4], - "floats": [0.1, 0.2, 0.3, 0.4], - } - ) - - connection.create_table(table_name, input_df, overwrite=True) # type: ignore - scan_database(connection, table_name) From c090301793a5560959e7fcc3bf3942d16e0e5dfd Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 12 Jun 2026 17:12:28 -0400 Subject: [PATCH 4/8] explicit globals safer? --- tests/test_parser.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 56cb3ca..b0dd0f3 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -77,8 +77,8 @@ def assert_error_or_none( ) def test_fixture_consistency(fixture: Fixture): # Does the polars expression have the expected result? - lf = pl.LazyFrame(input_data[fixture.category]) # noqa: F841 - polars_output = eval(fixture.expression).collect().to_dict(as_series=False) + globals = {"lf": pl.LazyFrame(input_data[fixture.category]), "pl": pl} + polars_output = eval(fixture.expression, globals).collect().to_dict(as_series=False) assert polars_output == fixture.expected_output, "Typo in fixture?" @@ -98,8 +98,8 @@ def test_translate_table_new(fixture: Fixture, backend: str, exporter_key: str): lambda: get_connection(input_df, table_name=table_name, backend=backend), ) - lf = scan_database(connection, table_name) - lf = eval(fixture.expression) + globals = {"lf": scan_database(connection, table_name), "pl": pl} + lf = eval(fixture.expression, globals) ibis_table = convert_polars_to_ibis(lf, table_name) From 337d2fcda18477924d5709762d0140015779328a Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 12 Jun 2026 17:26:02 -0400 Subject: [PATCH 5/8] scan_database in README --- README-PYPI.md | 13 +++++++++++-- README.md | 13 +++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/README-PYPI.md b/README-PYPI.md index 54bf884..148c495 100644 --- a/README-PYPI.md +++ b/README-PYPI.md @@ -6,8 +6,6 @@ Convert [Polars LazyFrames](https://docs.pola.rs/api/python/stable/reference/laz Polars and Ibis have similar APIs, but while Polars supports computation in-memory and on [Polars Cloud](https://cloud.pola.rs/), Ibis by itself does not handle computation: Instead it translates the dataframe expression into idiomatic SQL for a particular database. -The public interface of `polars_to_ibis` consists of exactly one function: `convert_polars_to_ibis`. - ## Example ```python @@ -55,6 +53,17 @@ Finally, we can execute in SQLite the query which we constructed in Polars and t ``` +In this example we somewhat artificially started with a Polars LazyFrame. +In the real world, you more likely would start with a database. +To read a database table's schema and create from that a LazyFrame, use `scan_database`: + +```python +>>> from polars_to_ibis import scan_database +>>> scan_database(connection, table_name).collect_schema() +Schema([('ints', Int64)]) + +``` + ## Limitations diff --git a/README.md b/README.md index 9eb32eb..4176634 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,6 @@ Convert [Polars LazyFrames](https://docs.pola.rs/api/python/stable/reference/laz Polars and Ibis have similar APIs, but while Polars supports computation in-memory and on [Polars Cloud](https://cloud.pola.rs/), Ibis by itself does not handle computation: Instead it translates the dataframe expression into idiomatic SQL for a particular database. -The public interface of `polars_to_ibis` consists of exactly one function: `convert_polars_to_ibis`. - ## Example ```python @@ -55,6 +53,17 @@ Finally, we can execute in SQLite the query which we constructed in Polars and t ``` +In this example we somewhat artificially started with a Polars LazyFrame. +In the real world, you more likely would start with a database. +To read a database table's schema and create from that a LazyFrame, use `scan_database`: + +```python +>>> from polars_to_ibis import scan_database +>>> scan_database(connection, table_name).collect_schema() +Schema([('ints', Int64)]) + +``` + ## Limitations From 032062b7acdc06e0959910b8e81e4ef3c26abdd7 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 12 Jun 2026 17:27:24 -0400 Subject: [PATCH 6/8] docstring --- polars_to_ibis/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/polars_to_ibis/__init__.py b/polars_to_ibis/__init__.py index 3f8df4e..ff775fc 100644 --- a/polars_to_ibis/__init__.py +++ b/polars_to_ibis/__init__.py @@ -42,6 +42,9 @@ def _check_version(): def scan_database(connection: Any, table_name: str): + """ + Get the schema from a database table and convert it to Polars. + """ ibis_schema = connection.get_schema(table_name) return pl.LazyFrame(schema=ibis_schema.to_polars()) From 2c2e9eceb363bf9ebb374952ffd65ce9b4fa0680 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 12 Jun 2026 17:32:36 -0400 Subject: [PATCH 7/8] explain coverage gap --- tests/test_parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_parser.py b/tests/test_parser.py index b0dd0f3..1c59a2a 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -41,6 +41,8 @@ def get_connection(df: pl.DataFrame, table_name: str, backend: str): # Test fixtures: backends = [ + # Polars could be tested, but there's an error getting the schema, + # and since it's not a realistic target for us, drop it from coverage. "sqlite", "duckdb", pytest.param("postgres", marks=pytest.mark.extra_install), From 3c272fbe07872cf219ad8d0df75990504b0412b9 Mon Sep 17 00:00:00 2001 From: Chuck McCallum Date: Fri, 12 Jun 2026 17:37:34 -0400 Subject: [PATCH 8/8] changes in repr --- README-PYPI.md | 4 ++-- README.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README-PYPI.md b/README-PYPI.md index 148c495..09d28f4 100644 --- a/README-PYPI.md +++ b/README-PYPI.md @@ -59,8 +59,8 @@ To read a database table's schema and create from that a LazyFrame, use `scan_da ```python >>> from polars_to_ibis import scan_database ->>> scan_database(connection, table_name).collect_schema() -Schema([('ints', Int64)]) +>>> dict(scan_database(connection, table_name).collect_schema()) +{'ints': Int64} ``` diff --git a/README.md b/README.md index 4176634..d7ed938 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,8 @@ To read a database table's schema and create from that a LazyFrame, use `scan_da ```python >>> from polars_to_ibis import scan_database ->>> scan_database(connection, table_name).collect_schema() -Schema([('ints', Int64)]) +>>> dict(scan_database(connection, table_name).collect_schema()) +{'ints': Int64} ```