diff --git a/README-PYPI.md b/README-PYPI.md index 54bf884..09d28f4 100644 --- a/README-PYPI.md +++ b/README-PYPI.md @@ -6,8 +6,6 @@ Convert [Polars LazyFrames](https://docs.pola.rs/api/python/stable/reference/laz Polars and Ibis have similar APIs, but while Polars supports computation in-memory and on [Polars Cloud](https://cloud.pola.rs/), Ibis by itself does not handle computation: Instead it translates the dataframe expression into idiomatic SQL for a particular database. -The public interface of `polars_to_ibis` consists of exactly one function: `convert_polars_to_ibis`. - ## Example ```python @@ -55,6 +53,17 @@ Finally, we can execute in SQLite the query which we constructed in Polars and t ``` +In this example we somewhat artificially started with a Polars LazyFrame. +In the real world, you more likely would start with a database. +To read a database table's schema and create from that a LazyFrame, use `scan_database`: + +```python +>>> from polars_to_ibis import scan_database +>>> dict(scan_database(connection, table_name).collect_schema()) +{'ints': Int64} + +``` + ## Limitations diff --git a/README.md b/README.md index 9eb32eb..d7ed938 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,6 @@ Convert [Polars LazyFrames](https://docs.pola.rs/api/python/stable/reference/laz Polars and Ibis have similar APIs, but while Polars supports computation in-memory and on [Polars Cloud](https://cloud.pola.rs/), Ibis by itself does not handle computation: Instead it translates the dataframe expression into idiomatic SQL for a particular database. -The public interface of `polars_to_ibis` consists of exactly one function: `convert_polars_to_ibis`. - ## Example ```python @@ -55,6 +53,17 @@ Finally, we can execute in SQLite the query which we constructed in Polars and t ``` +In this example we somewhat artificially started with a Polars LazyFrame. +In the real world, you more likely would start with a database. +To read a database table's schema and create from that a LazyFrame, use `scan_database`: + +```python +>>> from polars_to_ibis import scan_database +>>> dict(scan_database(connection, table_name).collect_schema()) +{'ints': Int64} + +``` + ## Limitations diff --git a/polars_to_ibis/__init__.py b/polars_to_ibis/__init__.py index 3671470..ff775fc 100644 --- a/polars_to_ibis/__init__.py +++ b/polars_to_ibis/__init__.py @@ -41,6 +41,14 @@ def _check_version(): ) +def scan_database(connection: Any, table_name: str): + """ + Get the schema from a database table and convert it to Polars. + """ + ibis_schema = connection.get_schema(table_name) + return pl.LazyFrame(schema=ibis_schema.to_polars()) + + def convert_polars_to_ibis(lf: pl.LazyFrame, table_name: str) -> ibis.Table: """ Convert a Polars LazyFrame to an Ibis unbound table. diff --git a/tests/test_parser.py b/tests/test_parser.py index 1fe2bbe..1c59a2a 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -6,7 +6,7 @@ import polars as pl import pytest -from polars_to_ibis import convert_polars_to_ibis +from polars_to_ibis import convert_polars_to_ibis, scan_database from polars_to_ibis._parse.table_handlers import update_polars_to_ibis from .fixtures import Fixture, fixtures, input_data @@ -41,7 +41,8 @@ def get_connection(df: pl.DataFrame, table_name: str, backend: str): # Test fixtures: backends = [ - "polars", + # Polars could be tested, but there's an error getting the schema, + # and since it's not a realistic target for us, drop it from coverage. "sqlite", "duckdb", pytest.param("postgres", marks=pytest.mark.extra_install), @@ -73,22 +74,23 @@ def assert_error_or_none( return result +@pytest.mark.parametrize( + "fixture", fixtures, ids=lambda fixture: f"{fixture.category}-{fixture.expression}" +) +def test_fixture_consistency(fixture: Fixture): + # Does the polars expression have the expected result? + globals = {"lf": pl.LazyFrame(input_data[fixture.category]), "pl": pl} + polars_output = eval(fixture.expression, globals).collect().to_dict(as_series=False) + assert polars_output == fixture.expected_output, "Typo in fixture?" + + @pytest.mark.parametrize( "fixture", fixtures, ids=lambda fixture: f"{fixture.category}-{fixture.expression}" ) @pytest.mark.parametrize("backend", backends) @pytest.mark.parametrize("exporter_key", exporters.keys()) # type: ignore -def test_translate_table(fixture: Fixture, backend: str, exporter_key: str): - # Sanity check: Does the polars expression have the expected result? - lf = pl.LazyFrame(input_data[fixture.category]) - polars_output = eval(fixture.expression).collect().to_dict(as_series=False) - assert polars_output == fixture.expected_output, "Typo in test?" - - # Convert polars to ibis, but without any data: - lf = pl.LazyFrame(schema=lf.collect_schema()) - lf = eval(fixture.expression) +def test_translate_table_new(fixture: Fixture, backend: str, exporter_key: str): table_name = "default_table" - ibis_table = convert_polars_to_ibis(lf, table_name) # Set up target database, with data: input_df = pl.DataFrame(input_data[fixture.category]) @@ -98,6 +100,11 @@ def test_translate_table(fixture: Fixture, backend: str, exporter_key: str): lambda: get_connection(input_df, table_name=table_name, backend=backend), ) + globals = {"lf": scan_database(connection, table_name), "pl": pl} + lf = eval(fixture.expression, globals) + + ibis_table = convert_polars_to_ibis(lf, table_name) + # Run query on target database: export = exporters[exporter_key] # type: ignore expected_backend_error = fixture.backend_errors.get(