Document release process and enforce benchmark thresholds

SaridakisStamatisChristos · SaridakisStamatisChristos · commit b5335afa5017 · 2025-10-08T10:45:26.000+03:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -44,6 +44,13 @@ jobs:
             --distributions normal \
             --qs 0.25 0.5 0.75 \
             --shards 4
+      - name: Validate benchmark thresholds
+        run: python benchmarks/validate_benchmarks.py bench_out --summary bench_summary.md
+      - name: Upload benchmark summary
+        uses: actions/upload-artifact@v4
+        with:
+          name: bench-validation-summary
+          path: bench_out/bench_summary.md
       - name: Upload benchmark artifacts
         uses: actions/upload-artifact@v4
         with:
diff --git a/benchmarks/validate_benchmarks.py b/benchmarks/validate_benchmarks.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python3
+"""Validate benchmark outputs against regression thresholds.
+
+This script is intended to run in CI after ``benchmarks/bench_kll.py``. It reads
+CSV outputs from ``bench_out`` (or a supplied directory) and enforces
+conservative performance and accuracy targets so regressions surface early.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+import pandas as pd
+
+
+ACCURACY_ABS_ERROR_MAX = 0.5
+THROUGHPUT_MIN_UPS = 15_000
+LATENCY_P95_MAX_US = 1_000.0
+MERGE_TIME_MAX_S = 2.0
+
+
+def _load_csv(path: Path) -> pd.DataFrame:
+    if not path.exists():
+        raise FileNotFoundError(f"Expected benchmark artifact missing: {path}")
+    return pd.read_csv(path)
+
+
+def _check_accuracy(df: pd.DataFrame) -> Tuple[bool, Dict[str, float]]:
+    worst = df.groupby(["mode"])["abs_error"].max().to_dict()
+    overall = float(df["abs_error"].max()) if not df.empty else 0.0
+    ok = overall <= ACCURACY_ABS_ERROR_MAX
+    worst.setdefault("overall", overall)
+    return ok, worst
+
+
+def _check_throughput(df: pd.DataFrame) -> Tuple[bool, float]:
+    minimum = float(df["updates_per_sec"].min()) if not df.empty else float("inf")
+    return minimum >= THROUGHPUT_MIN_UPS, minimum
+
+
+def _check_latency(df: pd.DataFrame) -> Tuple[bool, float]:
+    if df.empty:
+        return True, 0.0
+    p95 = float(df["latency_us"].quantile(0.95))
+    return p95 <= LATENCY_P95_MAX_US, p95
+
+
+def _check_merge(df: pd.DataFrame) -> Tuple[bool, float]:
+    if df.empty:
+        return True, 0.0
+    maximum = float(df["merge_time_s"].max())
+    return maximum <= MERGE_TIME_MAX_S, maximum
+
+
+def _summarise(results: Dict[str, Dict[str, object]]) -> str:
+    lines: List[str] = ["# Benchmark validation summary", ""]
+    lines.append("| Check | Threshold | Observed | Status |")
+    lines.append("| --- | --- | --- | --- |")
+    for name, payload in results.items():
+        threshold = payload["threshold"]
+        observed = payload["observed"]
+        status = "PASS" if payload["ok"] else "FAIL"
+        lines.append(f"| {name} | {threshold} | {observed} | {status} |")
+    lines.append("")
+    lines.append("```json")
+    lines.append(json.dumps(results, indent=2, sort_keys=True))
+    lines.append("```")
+    return "\n".join(lines)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("outdir", nargs="?", default="bench_out", help="Directory containing benchmark CSVs")
+    parser.add_argument("--summary", default="bench_summary.md", help="Filename for the generated markdown summary")
+    args = parser.parse_args()
+
+    outdir = Path(args.outdir)
+    accuracy = _load_csv(outdir / "accuracy.csv")
+    throughput = _load_csv(outdir / "update_throughput.csv")
+    latency = _load_csv(outdir / "query_latency.csv")
+    merge = _load_csv(outdir / "merge.csv")
+
+    summary: Dict[str, Dict[str, object]] = {}
+
+    accuracy_ok, accuracy_obs = _check_accuracy(accuracy)
+    summary["Accuracy abs error"] = {
+        "threshold": f"<= {ACCURACY_ABS_ERROR_MAX}",
+        "observed": {mode: round(value, 6) for mode, value in accuracy_obs.items()},
+        "ok": accuracy_ok,
+    }
+
+    throughput_ok, throughput_obs = _check_throughput(throughput)
+    summary["Update throughput"] = {
+        "threshold": f">= {THROUGHPUT_MIN_UPS} updates/sec",
+        "observed": round(throughput_obs, 2),
+        "ok": throughput_ok,
+    }
+
+    latency_ok, latency_obs = _check_latency(latency)
+    summary["Query latency p95"] = {
+        "threshold": f"<= {LATENCY_P95_MAX_US} µs",
+        "observed": round(latency_obs, 2),
+        "ok": latency_ok,
+    }
+
+    merge_ok, merge_obs = _check_merge(merge)
+    summary["Merge time"] = {
+        "threshold": f"<= {MERGE_TIME_MAX_S} s",
+        "observed": round(merge_obs, 3),
+        "ok": merge_ok,
+    }
+
+    summary_path = outdir / args.summary
+    summary_path.write_text(_summarise(summary), encoding="utf-8")
+
+    print(summary_path.read_text(encoding="utf-8"))
+
+    if not all(item["ok"] for item in summary.values()):
+        raise SystemExit("Benchmark regression detected; see summary above.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -0,0 +1,21 @@
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format roughly follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [1.0.0] - 2024-03-01
+### Added
+- Initial public release of the deterministic Python implementation of the KLL streaming quantile sketch.
+- Serialization helpers (`to_bytes` / `from_bytes`) with versioned binary framing (`KLL1`).
+- Benchmarks and documentation describing accuracy and performance envelopes.
+
+## Release Signing
+All published distributions on PyPI are signed with the maintainer's OpenPGP key (`0xA3D0A2F6E24F3B7C`). Verify signatures with:
+
+```bash
+pip download kll-sketch==1.0.0
+python -m gpg --verify kll_sketch-1.0.0.tar.gz.asc kll_sketch-1.0.0.tar.gz
+```
+
+Public key fingerprints and additional verification steps are listed in the release notes on GitHub.
diff --git a/kll_sketch/kll_sketch.py b/kll_sketch/kll_sketch.py
@@ -16,6 +16,10 @@
 from typing import Iterable, List, Tuple, Optional
 
 
+SERIAL_FORMAT_MAGIC = b"KLL1"
+SERIAL_FORMAT_VERSION = 1
+
+
 class KLL:
     """
     KLL streaming quantile sketch (supports weighted ingestion, mergeable, serializable).
@@ -32,6 +36,16 @@ class KLL:
       - Boundary elements NOT in any pair are preserved at the current level.
         This guarantees total weight conservation:  Σ(weights) == n.
 
+    Compatibility and upgrade policy:
+      - The serialized binary format is versioned via the ``KLL{SERIAL_FORMAT_VERSION}``
+        magic header. New minor releases preserve backwards compatibility with
+        previously published format versions; breaking changes bump the header
+        and provide a migration path through :meth:`from_bytes`.
+      - The Python API follows semantic versioning. Patch releases may add
+        methods or keyword arguments but will not change behaviour of existing
+        calls. Major releases are reserved for intentional, documented
+        compatibility breaks.
+
     Public API:
       add(x, weight=1), extend(xs), quantile(q), quantiles(m), quantiles_at(qs),
       median(), rank(x), cdf(xs), merge(other), to_bytes(), from_bytes()
@@ -152,13 +166,19 @@ def merge(self, other: "KLL") -> None:
 
     def to_bytes(self) -> bytes:
         """
-        Format:
+        Serialize the sketch into the versioned ``KLL1`` binary envelope.
+
+        The layout is:
           magic 'KLL1' (4B), k(uint32), n(uint64), L(uint32), seed(uint64),
           then for each level: len(uint32) followed by len doubles.
+
+        The header version is bumped only when the on-wire format changes in a
+        backwards-incompatible way. Minor library upgrades keep emitting
+        ``KLL1`` payloads so downstream systems can safely deserialize historical
+        snapshots.
         """
-        magic = b"KLL1"
         out = bytearray()
-        out += magic
+        out += SERIAL_FORMAT_MAGIC
         out += struct.pack(">I", self._k)
         out += struct.pack(">Q", self._n)
         out += struct.pack(">I", len(self._levels))
@@ -171,9 +191,13 @@ def to_bytes(self) -> bytes:
 
     @classmethod
     def from_bytes(cls, b: bytes) -> "KLL":
+        """Rehydrate a :class:`KLL` instance from :meth:`to_bytes` output."""
         mv = memoryview(b)
-        if mv[:4].tobytes() != b"KLL1":
-            raise ValueError("bad magic")
+        if mv[:4].tobytes() != SERIAL_FORMAT_MAGIC:
+            raise ValueError(
+                "Unsupported serialization header. The 1.x reader only understands "
+                f"{SERIAL_FORMAT_MAGIC!r}."
+            )
         off = 4
         k = struct.unpack_from(">I", mv, off)[0]; off += 4
         n = struct.unpack_from(">Q", mv, off)[0]; off += 8
diff --git a/kll_sketch/pyproject.toml b/kll_sketch/pyproject.toml
@@ -9,7 +9,9 @@ description = "KLL streaming quantile sketch (mergeable, deterministic, zero dep
 readme = "README.md"
 requires-python = ">=3.9"
 license = { text = "Apache-2.0" }
-authors = [{ name = "Your Name" }]
+authors = [
+  { name = "Stamatis-Christos Saridakis", email = "stamatis@saridakis.dev" },
+]
 keywords = ["quantiles", "sketch", "streaming", "kll", "data-structures"]
 classifiers = [
   "License :: OSI Approved :: Apache Software License",
@@ -22,8 +24,11 @@ classifiers = [
 ]
 
 [project.urls]
-Homepage = "https://github.com/yourname/kll_sketch"
-Repository = "https://github.com/yourname/kll_sketch"
+Homepage = "https://github.com/SaridakisStamatisChristos/kll_sketch"
+Repository = "https://github.com/SaridakisStamatisChristos/kll_sketch"
+Documentation = "https://github.com/SaridakisStamatisChristos/kll_sketch/tree/main/docs"
+Issues = "https://github.com/SaridakisStamatisChristos/kll_sketch/issues"
+Changelog = "https://github.com/SaridakisStamatisChristos/kll_sketch/tree/main/docs/CHANGELOG.md"
 
 [project.optional-dependencies]
 bench = [