Skip to content

ci: Add codspeed for performance monitoring #2516

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions .github/workflows/check_tpch_queries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: Tests for TPCH Queries

on:
pull_request:
workflow_dispatch:
push:
branches: [main]

Expand Down Expand Up @@ -33,5 +34,13 @@ jobs:
uv pip install -U --pre duckdb --system
- name: generate-data
run: cd tpch && python generate_data.py
- name: tpch-tests
run: cd tpch && pytest tests
- name: tpch-tests
if: ${{ !contains(github.event.pull_request.labels.*.name, 'performance')}}
run: cd tpch && pytest tests/queries_test.py::test_execute_query
- name: tpch-benchmark
if: ${{ contains(github.event.pull_request.labels.*.name, 'performance')}}
uses: CodSpeedHQ/action@v3
with:
run: |
uv pip install "numpy<2" pytest-codspeed --system
cd tpch && pytest tests/queries_test.py::test_benchmark_query --codspeed
1 change: 0 additions & 1 deletion .github/workflows/pytest-pyspark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ env:
PY_COLORS: 1

jobs:

pytest-pyspark-constructor:
if: ${{ contains(github.event.pull_request.labels.*.name, 'pyspark') || contains(github.event.pull_request.labels.*.name, 'spark-like') || contains(github.event.pull_request.labels.*.name, 'release')}}
strategy:
Expand Down
42 changes: 29 additions & 13 deletions tpch/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import argparse
from importlib import import_module
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any

import dask.dataframe as dd
import duckdb
Expand All @@ -15,6 +17,9 @@

import narwhals as nw

if TYPE_CHECKING:
from types import ModuleType

pd.options.mode.copy_on_write = True
pd.options.future.infer_string = True # pyright: ignore[reportAttributeAccessIssue, reportOptionalMemberAccess]
pl.Config.set_fmt_float("full")
Expand Down Expand Up @@ -87,31 +92,42 @@
"q22": (CUSTOMER_PATH, ORDERS_PATH),
}

ROOT_PATH = Path(__file__).resolve().parent.parent
# Directory containing all the query scripts
QUERIES_DIR = ROOT_PATH / "queries"

def execute_query(query_id: str) -> None:

def _execute_query_single_backend(
query_id: str, native_namespace: ModuleType, **kwargs: Any
) -> pl.DataFrame:
query_module = import_module(f"tpch.queries.{query_id}")
data_paths = QUERY_DATA_PATH_MAP[query_id]

expected = pl.read_parquet(DATA_DIR / f"result_{query_id}.parquet")
return (
query_module.query(
*(
nw.scan_parquet(str(path), backend=native_namespace, **kwargs)
for path in data_paths
)
)
.lazy()
.collect(backend=nw.Implementation.POLARS)
.to_native()
)


def execute_query(query_id: str) -> None:
expected = pl.read_parquet(DATA_DIR / f"result_{query_id}.parquet")
for backend, (native_namespace, kwargs) in BACKEND_NAMESPACE_KWARGS_MAP.items():
if backend in {"duckdb", "sqlframe"} and query_id in DUCKDB_SKIPS:
print(f"\nSkipping {query_id} for {backend}") # noqa: T201
continue

print(f"\nRunning {query_id} with {backend=}") # noqa: T201
result: pl.DataFrame = (
query_module.query(
*(
nw.scan_parquet(str(path), backend=native_namespace, **kwargs)
for path in data_paths
)
)
.lazy()
.collect(backend=nw.Implementation.POLARS)
.to_native()
)

result = _execute_query_single_backend(
query_id=query_id, native_namespace=native_namespace, **kwargs
)
assert_frame_equal(expected, result, check_dtypes=False)


Expand Down
11 changes: 6 additions & 5 deletions tpch/generate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
import pyarrow.csv as pc
import pyarrow.parquet as pq

if not Path("data").exists():
Path("data").mkdir()

SCALE_FACTOR = 0.1
Copy link
Member

@dangotbanned dangotbanned May 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@FBruzzesi (#805 (comment))

In #972 we were using TPCH with 0.25 ratio and it was taking ~40mins to run IIRC. That's a bit much for what I would consider fast iteration - maybe a ratio of 0.1 is more reasonable to start with

IIRC the docs for the duckdb TPCH tests used 0.01 - so we can go lower

I found the bit in the docs that used 0.01 (https://duckdb.org/docs/0.10/extensions/tpch#listing-expected-answers)

To produced the expected results for all queries on scale factors 0.01, 0.1, and 1, run

If we can run these with 10x less data, surely we should right?

The current run has been going for almost 2 hours πŸ˜…
(https://github.com/narwhals-dev/narwhals/actions/runs/15098359607/job/42436026213?pr=2516)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current run has been going for almost 2 hours πŸ˜…

Yes I have been monitoring it - it's a bit odd, isn't it? I am not fully sure what's going on πŸ€”


data_path = Path("data")
data_path.mkdir(parents=True, exist_ok=True)

con = duckdb.connect(database=":memory:")
con.execute("INSTALL tpch; LOAD tpch")
con.execute(f"CALL dbgen(sf={SCALE_FACTOR})")
Expand All @@ -37,7 +38,7 @@
else:
new_schema.append(field)
tbl_arrow = tbl_arrow.cast(pa.schema(new_schema))
pq.write_table(tbl_arrow, Path("data") / f"{t}.parquet")
pq.write_table(tbl_arrow, data_path / f"{t}.parquet")


results = con.query(
Expand All @@ -53,4 +54,4 @@
tbl_answer = pc.read_csv(
io.BytesIO(answer.encode("utf-8")), parse_options=pc.ParseOptions(delimiter="|")
)
pq.write_table(tbl_answer, Path("data") / f"result_q{query_nr}.parquet")
pq.write_table(tbl_answer, data_path / f"result_q{query_nr}.parquet")
48 changes: 37 additions & 11 deletions tpch/tests/queries_test.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,51 @@
from __future__ import annotations

import subprocess
import sys
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any

import pytest

from tpch.execute import BACKEND_NAMESPACE_KWARGS_MAP
from tpch.execute import DUCKDB_SKIPS
from tpch.execute import _execute_query_single_backend
from tpch.execute import execute_query

if TYPE_CHECKING:
from types import ModuleType

from pytest_codspeed.plugin import BenchmarkFixture


ROOT_PATH = Path(__file__).resolve().parent.parent
# Directory containing all the query scripts
QUERIES_DIR = ROOT_PATH / "queries"


@pytest.mark.parametrize("query_path", QUERIES_DIR.glob("q[1-9]*.py"))
def test_execute_scripts(query_path: Path) -> None:
def test_execute_query(query_path: Path) -> None:
print(f"executing query {query_path.stem}") # noqa: T201
result = subprocess.run( # noqa: S603
[sys.executable, "-m", "execute", str(query_path.stem)],
capture_output=True,
text=True,
check=False,
)
assert result.returncode == 0, (
f"Script {query_path} failed with error: {result.stderr}"
_ = execute_query(query_id=query_path.stem)


@pytest.mark.parametrize("query_path", QUERIES_DIR.glob("q[1-9]*.py"))
@pytest.mark.parametrize(
("backend", "namespace_and_kwargs"), BACKEND_NAMESPACE_KWARGS_MAP.items()
)
def test_benchmark_query(
benchmark: BenchmarkFixture,
query_path: Path,
backend: str,
namespace_and_kwargs: tuple[ModuleType, dict[str, Any]],
) -> None:
query_id = query_path.stem
native_namespace, kwargs = namespace_and_kwargs

if backend in {"duckdb", "sqlframe"} and query_id in DUCKDB_SKIPS:
pytest.skip()

_ = benchmark(
lambda: _execute_query_single_backend(
query_id=query_id, native_namespace=native_namespace, **kwargs
)
)
Loading