NatLabRockies · daniel-thom · Apr 8, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 10, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -7,7 +7,7 @@ on:
   pull_request:
 
 env:
-  DEFAULT_PYTHON: "3.12"
+  DEFAULT_PYTHON: "3.13"
   DEFAULT_OS: ubuntu-latest
 
 jobs:
@@ -29,15 +29,10 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install ".[dev,spark]"
-        wget https://dlcdn.apache.org/spark/spark-4.0.1/spark-4.0.1-bin-hadoop3.tgz
-        tar -xzf spark-4.0.1-bin-hadoop3.tgz
-        export SPARK_HOME=$(pwd)/spark-4.0.1-bin-hadoop3
-        export PATH=$SPARK_HOME/sbin:$PATH
-        start-thriftserver.sh
+        python -m pip install -e ".[dev,spark]"
     - name: Run pytest with coverage
       run: |
-        CHRONIFY_HIVE_URL=hive://localhost:10000/default pytest -v --cov --cov-report=xml
+        pytest -v --cov=chronify --cov-report=xml:coverage.xml
     - name: codecov
       uses: codecov/codecov-action@v4.2.0
       if: ${{ matrix.os == env.DEFAULT_OS && matrix.python-version == env.DEFAULT_PYTHON  }}
@@ -53,7 +48,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: 3.12
+        python-version: 3.13
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # chronify
 
-[![Documentation](https://img.shields.io/badge/docs-ready-blue.svg)](https://nrel.github.io/chronify)
-[![codecov](https://codecov.io/gh/nrel/chronify/graph/badge.svg?token=WIY2KAOX63)](https://codecov.io/gh/nrel/chronify)
+[![Documentation](https://img.shields.io/badge/docs-ready-blue.svg)](https://natlabrockies.github.io/chronify)
+[![codecov](https://codecov.io/gh/natlabrockies/chronify/graph/badge.svg?token=WIY2KAOX63)](https://codecov.io/gh/natlabrockies/chronify)
 
 
 This package implements a store for time series data in support of Python-based
@@ -16,7 +16,7 @@ To use DuckDB or SQLite as the backend:
 $ pip install chronify
 ```
 
-To use Apache Spark via Apache Thrift Server as the backend:
+To use Apache Spark as the backend:
 ```
 $ pip install "chronify[spark]"
 ```
@@ -32,5 +32,5 @@ $ pre-commit install
 ```
 
 ## License
-chronify is developed under NREL Software Record SWR-21-52, "demand-side grid model".
-[License](https://github.com/NREL/chronify/blob/main/LICENSE).
+chronify is developed under NLR Software Record SWR-21-52, "demand-side grid model".
+[License](https://github.com/NatLabRockies/chronify/blob/main/LICENSE).
diff --git a/docs/conf.py b/docs/conf.py
@@ -7,8 +7,8 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
 project = "Chronify"
-copyright = "2024, NREL"
-author = "NREL"
+copyright = "2026, Alliance for Energy Innovation"
+author = "NLR"
 release = "0.1.0"
 
 # -- General configuration ---------------------------------------------------

diff --git a/docs/how_tos/getting_started/installation.md b/docs/how_tos/getting_started/installation.md
@@ -37,8 +37,8 @@ To use DuckDB or SQLite as the backend:
     $ pip install chronify
 ```
 
-To use Apache Spark via Apache Thrift Server as the backend, you must install pyhive.
-This command will install the necessary dependencies.
+To use Apache Spark as the backend, install chronify with the ``spark`` extra,
+which pulls in PySpark:
 
 ```{eval-rst}
 .. code-block:: console

diff --git a/docs/how_tos/getting_started/quick_start.md b/docs/how_tos/getting_started/quick_start.md
@@ -28,16 +28,16 @@ store.ingest_tables(
         time_array_id_columns=["id"],
     )
  )
-query = "SELECT timestamp, value FROM devices WHERE id = ?"
-df = store.read_query("devices", query, params=(2,))
+devices = store.read_table("devices")
+df = devices.filter(devices.id == 2).select("timestamp", "value").execute()
 df.head()
 ```
 
 ```
-            timestamp     value  id
-0 2020-01-01 00:00:00  0.594748   2
-1 2020-01-01 01:00:00  0.608295   2
-2 2020-01-01 02:00:00  0.297535   2
-3 2020-01-01 03:00:00  0.870238   2
-4 2020-01-01 04:00:00  0.376144   2
+            timestamp     value
+0 2020-01-01 00:00:00  0.594748
+1 2020-01-01 01:00:00  0.608295
+2 2020-01-01 02:00:00  0.297535
+3 2020-01-01 03:00:00  0.870238
+4 2020-01-01 04:00:00  0.376144
 ```
diff --git a/docs/how_tos/ingest_multiple_tables.md b/docs/how_tos/ingest_multiple_tables.md
@@ -1,7 +1,6 @@
 # How to Ingest Multiple Tables Efficiently
 
 There are a few important considerations when ingesting many tables:
-- Use one database connection.
 - Avoid loading all tables into memory at once, if possible.
 - Ensure additions are atomic. If anything fails, the final state should be the same as the initial
 state.
@@ -14,12 +13,15 @@ device.
 ```python
 from datetime import datetime, timedelta
 
-import numpy as np
-import pandas as pd
-from chronify import DatetimeRange, Store, TableSchema, CsvTableSchema
+from chronify import (
+    ColumnDType,
+    CsvTableSchema,
+    DatetimeRange,
+    Store,
+    TableSchema,
+)
 
 store = Store.create_in_memory_db()
-resolution = timedelta(hours=1)
 time_config = DatetimeRange(
     time_column="timestamp",
     start=datetime(2020, 1, 1, 0),
@@ -29,44 +31,52 @@ time_config = DatetimeRange(
 src_schema = CsvTableSchema(
     time_config=time_config,
     column_dtypes=[
-        ColumnDType(name="timestamp", dtype=DateTime(timezone=False)),
-        ColumnDType(name="device1", dtype=Double()),
-        ColumnDType(name="device2", dtype=Double()),
-        ColumnDType(name="device3", dtype=Double()),
+        ColumnDType(name="timestamp", dtype="datetime"),
+        ColumnDType(name="device1", dtype="float"),
+        ColumnDType(name="device2", dtype="float"),
+        ColumnDType(name="device3", dtype="float"),
     ],
     value_columns=["device1", "device2", "device3"],
     pivoted_dimension_name="device",
 )
 dst_schema = TableSchema(
     name="devices",
+    time_config=time_config,
     value_column="value",
-    time_array_id_columns=["id"],
+    time_array_id_columns=["device"],
 )
 ```
 
-## Automated through chronfiy
+## Automated through chronify
 Chronify will manage the database connection and errors.
 ```python
 store.ingest_from_csvs(
-    src_schema,
-    dst_schema,
     (
         "/path/to/file1.csv",
         "/path/to/file2.csv",
         "/path/to/file3.csv",
     ),
- )
+    src_schema,
+    dst_schema,
+)
 
 ```
 
 ## Self-Managed
-Open one connection to the database for the duration of your additions. Handle errors.
+Wrap the additions in a backend transaction. Any tables or views created within the block are
+automatically dropped if an exception is raised.
 ```python
-with store.engine.connect() as conn:
-    try:
-        store.ingest_from_csv(src_schema, dst_schema, "/path/to/file1.csv")
-        store.ingest_from_csv(src_schema, dst_schema, "/path/to/file2.csv")
-        store.ingest_from_csv(src_schema, dst_schema, "/path/to/file3.csv")
-    except Exception:
-        conn.rollback()
+with store.backend.transaction():
+    store.ingest_from_csv("/path/to/file1.csv", src_schema, dst_schema)
+    store.ingest_from_csv("/path/to/file2.csv", src_schema, dst_schema)
+    store.ingest_from_csv("/path/to/file3.csv", src_schema, dst_schema)
+```
+
+```{note}
+Real database transaction semantics depend on the backend. The DuckDB and SQLite backends issue
+a real `BEGIN` / `COMMIT` / `ROLLBACK` around the block, so partial inserts to existing tables
+are rolled back on failure. The Spark backend does not support transactions; the context
+manager falls back to best-effort cleanup that drops any tables or views created inside the
+block when an exception is raised, but rows appended to pre-existing tables cannot be
+rolled back.
 ```
diff --git a/docs/how_tos/map_time_config.md b/docs/how_tos/map_time_config.md
@@ -51,7 +51,7 @@ schema = TableSchema(
 )
 store = Store.create_in_memory_db()
 store.ingest_table(df, schema)
-store.read_query(src_table_name, f"SELECT * FROM {src_table_name} LIMIT 5").head()
+store.read_table(src_table_name).limit(5).execute()
 ```
 
 ```
@@ -77,7 +77,7 @@ dst_schema = TableSchema(
     )
 )
 store.map_table_time_config(src_table_name, dst_schema)
-store.read_query(dst_table_name, f"SELECT * FROM {dst_table_name} LIMIT 5").head()
+store.read_table(dst_table_name).limit(5).execute()
 ```
 
 ```

diff --git a/docs/how_tos/spark_backend.md b/docs/how_tos/spark_backend.md
@@ -14,17 +14,9 @@ $ tar -xzf spark-4.0.1-bin-hadoop3.tgz
 $ export SPARK_HOME=$(pwd)/spark-4.0.1-bin-hadoop3
 ```
 
-Start a Thrift server. This allows JDBC clients to send SQL queries to an in-process Spark cluster
-running in local mode.
-```
-$ $SPARK_HOME/sbin/start-thriftserver.sh --master=spark://$(hostname):7077
-```
-
-The URL to connect to this server is `hive://localhost:10000/default`
-
 ## Installation on an HPC
-The chronify development team uses these
-[scripts](https://github.com/NREL/HPC/tree/master/applications/spark) to run Spark on NREL's HPC.
+The chronify development team uses this
+[package](https://github.com/NatLabRockies/sparkctl) to run Spark on NLR's HPC.
 
 ## Chronify Usage
 This example creates a chronify Store with Spark as the backend and then adds a view to a Parquet
@@ -70,14 +62,23 @@ schema = TableSchema(
 
 ```python
 from chronify import Store
+from chronify.ibis.spark_backend import SparkBackend
+
+store = Store(backend=SparkBackend())
+store.create_view_from_parquet("data.parquet", schema)
+```
+
+Alternatively, pass a pre-configured PySpark session:
+```python
+from pyspark.sql import SparkSession
 
-store = Store.create_new_hive_store("hive://localhost:10000/default")
-store.create_view_from_parquet("data.parquet")
+session = SparkSession.builder.master("local").getOrCreate()
+store = Store(backend=SparkBackend(session=session))
 ```
 
 Verify the data:
 ```python
-store.read_table(schema.name).head()
+store.read_table(schema.name).execute().head()
 ```
 ```
             timestamp  id     value

diff --git a/docs/index.md b/docs/index.md
@@ -4,7 +4,7 @@ This package implements validation, mapping, and storage of time series data in
 Python-based modeling packages.
 
 ## Features
-- Stores time series data in any database supported by SQLAlchemy.
+- Stores time series data in any database supported by Ibis (DuckDB, SQLite, and Spark).
 - Supports data ingestion in a variety of file formats and configurations.
 - Supports efficient retrieval of time series through SQL queries.
 - Validates consistency of timestamps and resolution.
@@ -23,24 +23,12 @@ Python-based modeling packages.
 ```
 
 ## Supported Backends
-While chronify should work with any database supported by SQLAlchemy, it has been tested with
-the following:
+Chronify uses [Ibis](https://ibis-project.org) for all database operations. The following
+backends are supported:
 
 - DuckDB (default)
 - SQLite
-- Apache Spark through Apache Thrift Server
-
-DuckDB and SQLite are fully supported.
-
-Because of limitations in the backend software, chronify functionality with Spark is limited to
-the following:
-
-- Create a view into an existing Parquet file (or directory).
-- Perform time series checks.
-- Map between time configurations.
-- Write output data to Parquet files.
-
-There is no support for creating tables and ingesting data with Spark.
+- Apache Spark (via PySpark)
 
 ## How to use this guide
 - Refer to [How Tos](#how-tos-page) for step-by-step instructions for creating store and ingesting data.

diff --git a/pyproject.toml b/pyproject.toml
@@ -27,24 +27,20 @@ classifiers = [
 ]
 dependencies = [
     "duckdb ~= 1.1.0",
-    "duckdb_engine",
+    "ibis-framework[duckdb,sqlite] >= 9.0",
     "loguru",
     "pandas >= 2.2, < 3",
     "pyarrow",
     "pydantic >= 2.7, < 3",
     "pytz",
     "rich",
-    "sqlalchemy == 2.0.37",
     "tzdata",
-    # Required by pyhive
-    "future",
-    "python-dateutil",
 ]
 
 [project.optional-dependencies]
 spark = [
-    "thrift",
-    "thrift_sasl",
+    "ibis-framework[pyspark]",
+    "pyspark == 4.0.0",
 ]
 
 dev = [
@@ -63,25 +59,28 @@ dev = [
     "sphinx-tabs~=3.4",
 ]
 
-[project.entry-points."sqlalchemy.dialects"]
-hive = "pyhive.sqlalchemy_hive:HiveDialect"
-"hive.http" = "pyhive.sqlalchemy_hive:HiveHTTPDialect"
-"hive.https" = "pyhive.sqlalchemy_hive:HiveHTTPSDialect"
-
 [project.urls]
-Documentation = "https://github.com/NREL/chronify#readme"
-Issues = "https://github.com/NREL/chronify/issues"
-Source = "https://github.com/NREL/chronify"
+Documentation = "https://github.com/NatLabRockies/chronify#readme"
+Issues = "https://github.com/NatLabRockies/chronify/issues"
+Source = "https://github.com/NatLabRockies/chronify"
 
 [tool.mypy]
 files = [
   "src",
 ]
-exclude = [
-    "src/chronify/_vendor/*",
-]
 strict = true
 
+[[tool.mypy.overrides]]
+module = [
+  "ibis",
+  "ibis.*",
+  "pyarrow",
+  "pyarrow.*",
+  "pyspark",
+  "pyspark.*",
+]
+ignore_missing_imports = true
+
 [tool.pytest.ini_options]
 pythonpath = "src"
 minversion = "6.0"
@@ -99,7 +98,6 @@ exclude = [
     "dist",
     "env",
     "venv",
-    "src/chronify/_vendor/*",
 ]
 
 line-length = 99