substrait-io · EpsilonPrime · May 26, 2025 · May 26, 2025
diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml
@@ -0,0 +1,34 @@
+name: Run linter and formatter
+
+on:
+  pull_request:
+  push:
+    branches: [ main ]
+    tags: [ 'v*.*.*' ]
+
+permissions:
+  contents: read
+
+jobs:
+  test:
+    name: Lint and Format
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.9"]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - name: Install uv with python
+        uses: astral-sh/setup-uv@v6
+        with:
+          python-version: ${{ matrix.python }}
+      - name: Run ruff linter
+        run: |
+          uvx [email protected] check
+      - name: Run ruff formatter
+        run: |
+          uvx [email protected] format
diff --git a/examples/adbc_example.py b/examples/adbc_example.py
@@ -25,11 +25,15 @@
     names=["ints", "strs"],
 )
 
+
 def read_adbc_named_table(name: str, conn):
     pa_schema = conn.adbc_get_table_schema(name)
-    substrait_schema = pa_substrait.serialize_schema(pa_schema).to_pysubstrait().base_schema
+    substrait_schema = (
+        pa_substrait.serialize_schema(pa_schema).to_pysubstrait().base_schema
+    )
     return read_named_table(name, substrait_schema)
 
+
 with adbc_driver_duckdb.dbapi.connect(":memory:") as conn:
     with conn.cursor() as cur:
         cur.adbc_ingest("AnswerToEverything", data)
@@ -38,7 +42,14 @@ def read_adbc_named_table(name: str, conn):
         cur.executescript("LOAD substrait;")
 
         table = read_adbc_named_table("AnswerToEverything", conn)
-        table = filter(table, expression=scalar_function('functions_comparison.yaml', 'gte', column('ints'), literal(3, i64())))
+        table = filter(
+            table,
+            expression=scalar_function(
+                "functions_comparison.yaml",
+                "gte",
+                expressions=[column("ints"), literal(3, i64())],
+            ),
+        )
 
         cur.execute(table(registry).SerializeToString())
-        print(cur.fetch_arrow_table())
+        print(cur.fetch_arrow_table())
diff --git a/examples/builder_example.py b/examples/builder_example.py
@@ -6,19 +6,20 @@
 registry = ExtensionRegistry(load_default_extensions=True)
 
 ns = named_struct(
-    names=["id", "is_applicable"], 
-    struct=struct(
-        types=[
-            i64(nullable=False), 
-            boolean()
-        ]
-    )
+    names=["id", "is_applicable"], struct=struct(types=[i64(nullable=False), boolean()])
 )
 
-table = read_named_table('example_table', ns)
-table = filter(table, expression=column('is_applicable'))
-table = filter(table, expression=scalar_function('functions_comparison.yaml', 'lt', column('id'), literal(100, i64())))
-table = project(table, expressions=[column('id')])
+table = read_named_table("example_table", ns)
+table = filter(table, expression=column("is_applicable"))
+table = filter(
+    table,
+    expression=scalar_function(
+        "functions_comparison.yaml",
+        "lt",
+        expressions=[column("id"), literal(100, i64())],
+    ),
+)
+table = project(table, expressions=[column("id")])
 
 print(table(registry))
 

diff --git a/examples/duckdb_example.py b/examples/duckdb_example.py
@@ -18,7 +18,7 @@
 
 try:
     duckdb.install_extension("substrait")
-except:
+except duckdb.duckdb.HTTPException:
     duckdb.install_extension("substrait", repository="community")
 duckdb.load_extension("substrait")
 
@@ -29,14 +29,27 @@
 
 registry = ExtensionRegistry(load_default_extensions=True)
 
+
 def read_duckdb_named_table(name: str, conn):
     pa_schema = conn.sql(f"SELECT * FROM {name} LIMIT 0").arrow().schema
-    substrait_schema = pa_substrait.serialize_schema(pa_schema).to_pysubstrait().base_schema
+    substrait_schema = (
+        pa_substrait.serialize_schema(pa_schema).to_pysubstrait().base_schema
+    )
     return read_named_table(name, substrait_schema)
 
+
 table = read_duckdb_named_table("customer", duckdb)
-table = filter(table, expression=scalar_function('functions_comparison.yaml', 'equal', column('c_nationkey'), literal(3, i32())))
-table = project(table, expressions=[column('c_name'), column('c_address'), column('c_nationkey')])
+table = filter(
+    table,
+    expression=scalar_function(
+        "functions_comparison.yaml",
+        "equal",
+        expressions=[column("c_nationkey"), literal(3, i32())],
+    ),
+)
+table = project(
+    table, expressions=[column("c_name"), column("c_address"), column("c_nationkey")]
+)
 
 sql = f"CALL from_substrait_json('{dump_json(table(registry))}')"
 print(duckdb.sql(sql))
diff --git a/examples/pyarrow_example.py b/examples/pyarrow_example.py
@@ -11,21 +11,18 @@
 import substrait
 from substrait.builders.plan import project, read_named_table
 
-arrow_schema = pa.schema([
-    pa.field("x", pa.int32()),
-    pa.field("y", pa.int32())
-])
+arrow_schema = pa.schema([pa.field("x", pa.int32()), pa.field("y", pa.int32())])
 
-substrait_schema = pa_substrait.serialize_schema(arrow_schema).to_pysubstrait().base_schema
+substrait_schema = (
+    pa_substrait.serialize_schema(arrow_schema).to_pysubstrait().base_schema
+)
 
 substrait_expr = pa_substrait.serialize_expressions(
-    exprs=[pc.field("x") + pc.field("y")],
-    names=["total"],
-    schema=arrow_schema
+    exprs=[pc.field("x") + pc.field("y")], names=["total"], schema=arrow_schema
 )
 
 pysubstrait_expr = substrait.proto.ExtendedExpression.FromString(bytes(substrait_expr))
 
 table = read_named_table("example", substrait_schema)
 table = project(table, expressions=[pysubstrait_expr])(None)
-print(table)
+print(table)
diff --git a/pyproject.toml b/pyproject.toml
@@ -30,8 +30,3 @@ respect-gitignore = true
 target-version = "py39"
 # never autoformat upstream or generated code
 exclude = ["third_party/", "src/substrait/gen"]
-# do not autofix the following (will still get flagged in lint)
-lint.unfixable = [
-  "F401",  # unused imports
-  "T201",  # print statements
-]