flwrlabs · rwilliamspbg-ops · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 31, 2026
@@ -0,0 +1,3 @@
+artifacts/
+__pycache__/
+*.pyc
@@ -0,0 +1,135 @@
+---
+title: "Verifiable Aggregation Workflow"
+url: https://github.com/rwilliamspbg-ops/Sovereign-Mohawk-Proto
+labels: [verification, aggregation, reproducibility, message-api, synthetic-data]
+dataset: [synthetic]
+---
+
+## Verifiable Aggregation Workflow
+
+> Note: If you use this baseline in your work, please cite Flower and any upstream work that inspired your implementation.
+
+**Paper/Reference:** [Sovereign-Mohawk-Proto](https://github.com/rwilliamspbg-ops/Sovereign-Mohawk-Proto)
+
+**Authors:** Community contribution by rwilliamspbg-ops
+
+**Abstract:** This baseline demonstrates a reproducible federated learning workflow in Flower where standard FedAvg aggregation is augmented with optional server-side verification hooks. At each round, the server recomputes the weighted aggregate from raw client updates, compares it to the strategy output under a configurable tolerance, and records deterministic hashes and verification outcomes in a JSON report.
+
+## About this baseline
+
+**What is implemented:** A minimal Message API Flower baseline with deterministic synthetic data, optional verification checks around aggregation outputs, and benchmark-friendly reporting scripts.
+
+**Datasets:** Fully deterministic synthetic binary classification data generated per client partition.
+
+**Hardware Setup:** CPU-only runs are supported. Default configuration (8 clients, 5 rounds) typically finishes in under a minute on a laptop-class CPU.
+
+**Contributors:** rwilliamspbg-ops, Flower community maintainers
+
+## Experimental Setup
+
+**Task:** Binary classification.
+
+**Model:** Small MLP with two linear layers and one ReLU.
+
+**Dataset:**
+
+| Property | Value |
+| --- | --- |
+| Source | Generated on the fly (no downloads) |
+| Features | 10 float features |
+| Labels | Binary (0/1) |
+| Clients | 8 by default |
+| Local train examples/client | 128 |
+| Local val examples/client | 64 |
+| Partitioning | Deterministic client-specific distribution shift |
+
+**Training Hyperparameters (default):**
+
+| Hyperparameter | Value |
+| --- | --- |
+| num-server-rounds | 5 |
+| fraction-train | 1.0 |
+| fraction-evaluate | 1.0 |
+| local-epochs | 1 |
+| learning-rate | 0.05 |
+| batch-size | 32 |
+| random-seed | 2026 |
+| verify-aggregation | true |
+| verification-tolerance | 1e-6 |
+
+The number of simulated clients is controlled by the federation setting
+`options.num-supernodes` (default: 8), not by `--run-config`.
+
+## Environment Setup
+
+```bash
+# Create the virtual environment
+pyenv virtualenv 3.12.12 verifiableagg
+
+# Activate it
+pyenv activate verifiableagg
+
+# Install baseline
+pip install -e .
+
+# If you are contributing changes and want to run lint/type checks
+pip install -e ".[dev]"
+```
+
+For contributor checks used in Flower baselines CI:
+
+```bash
+cd ..
+./dev/test-baseline-structure.sh verifiableagg
+./dev/test-baseline.sh verifiableagg
+```
+
+## Running the Experiments
+
+```bash
+# Run with defaults from pyproject.toml
+flwr run .
+
+# Override selected values from the CLI
+flwr run . --run-config "num-server-rounds=10 verify-aggregation=true random-seed=2026"
+
+# Run benchmark helper script (train + report check)
+bash run_benchmark.sh
+```
+
+## Verification Outputs and Reproducibility
+
+After each run, artifacts are written to the directory set by artifacts-dir (default: artifacts):
+
+- artifacts/final_model.pt
+- artifacts/report.json
+
+The report includes:
+
+- Effective run configuration
+- Per-round aggregated train/eval metrics
+- Per-round verification status (pass/fail)
+- Maximum absolute replay difference
+- Deterministic SHA256 hash of aggregated parameters per round
+
+To summarize and validate verification outcomes:
+
+```bash
+python benchmark_report.py --report-path artifacts/report.json
+```
+
+This command exits non-zero if any round fails verification, which makes it suitable for CI or benchmark automation.
+
+## Expected Results
+
+With default settings, all rounds should pass verification with very small numerical replay error (typically near machine precision). Example benchmark output:
+
+```text
+round   num_replies      max_abs_diff    passed
+1       8                0.00000000e+00  1
+2       8                0.00000000e+00  1
+3       8                0.00000000e+00  1
+4       8                0.00000000e+00  1
+5       8                0.00000000e+00  1
+All rounds verified. Max observed absolute difference: 0.00000000e+00
+```
@@ -0,0 +1,51 @@
+"""Summarize verification report and exit non-zero on verification failure."""
+
+from __future__ import annotations
+
+import argparse
+import json
+from pathlib import Path
+
+
+def parse_args() -> argparse.Namespace:
+    """Parse command-line arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--report-path",
+        type=Path,
+        default=Path("artifacts/report.json"),
+        help="Path to report generated by the ServerApp.",
+    )
+    return parser.parse_args()
+
+
+def main() -> int:
+    """Print benchmark-ready verification summary."""
+    args = parse_args()
+    report = json.loads(args.report_path.read_text(encoding="utf-8"))
+
+    verification_rounds = report.get("verification_rounds", [])
+    if not verification_rounds:
+        print("No verification rounds found in report.")
+        return 1
+
+    failed = [item for item in verification_rounds if not item.get("passed", False)]
+
+    print("round\tnum_replies\tmax_abs_diff\tpassed")
+    for item in verification_rounds:
+        print(
+            f"{item['round']}\t{item['num_replies']}\t"
+            f"{item['max_abs_diff']:.8e}\t{int(item['passed'])}"
+        )
+
+    if failed:
+        print(f"Verification failed in {len(failed)} rounds.")
+        return 2
+
+    max_diff = max(float(item["max_abs_diff"]) for item in verification_rounds)
+    print(f"All rounds verified. Max observed absolute difference: {max_diff:.8e}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,144 @@
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[project]
+name = "verifiableagg"
+version = "1.0.0"
+description = "Verifiable Aggregation Workflow baseline using Flower Message API"
+license = "Apache-2.0"
+dependencies = [
+    "flwr[simulation]>=1.24.0",
+    "numpy>=1.26.0",
+    "torch==2.8.0",
+]
+
+[tool.hatch.metadata]
+allow-direct-references = true
+
+[project.optional-dependencies]
+dev = [
+    "isort==5.13.2",
+    "black==24.2.0",
+    "docformatter==1.7.5",
+    "mypy==1.8.0",
+    "pylint==3.3.1",
+    "pytest==7.4.4",
+    "pytest-watch==4.2.0",
+    "ruff==0.4.5",
+    "types-requests==2.31.0.20240125",
+]
+
+[tool.isort]
+profile = "black"
+
+[tool.black]
+line-length = 88
+target-version = ["py310", "py311", "py312"]
+
+[tool.pytest.ini_options]
+minversion = "6.2"
+addopts = "-qq"
+
+[tool.mypy]
+ignore_missing_imports = true
+strict = false
+plugins = "numpy.typing.mypy_plugin"
+
+[tool.pylint."MESSAGES CONTROL"]
+disable = "duplicate-code,too-few-public-methods,useless-import-alias"
+good-names = "i,j,k,_,x,y,X,Y,K,N"
+max-args = 10
+max-attributes = 15
+max-locals = 36
+max-branches = 20
+max-statements = 55
+
+[tool.pylint.typecheck]
+generated-members = "numpy.*, torch.*, tensorflow.*"
+
+[[tool.mypy.overrides]]
+module = [
+    "importlib.metadata.*",
+    "importlib_metadata.*",
+]
+follow_imports = "skip"
+follow_imports_for_stubs = true
+disallow_untyped_calls = false
+
+[[tool.mypy.overrides]]
+module = "torch.*"
+follow_imports = "skip"
+follow_imports_for_stubs = true
+
+[tool.docformatter]
+wrap-summaries = 88
+wrap-descriptions = 88
+
+[tool.ruff]
+target-version = "py310"
+line-length = 88
+exclude = [
+    ".bzr",
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".hg",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".pytype",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "venv",
+    "proto",
+]
+
+[tool.ruff.lint]
+select = ["D", "E", "F", "W", "B", "ISC", "C4", "UP"]
+fixable = ["D", "E", "F", "W", "B", "ISC", "C4", "UP"]
+ignore = ["B024", "B027", "D205", "D209"]
+
+[tool.ruff.lint.pydocstyle]
+convention = "numpy"
+
+[tool.hatch.build.targets.wheel]
+packages = ["."]
+
+[tool.flwr.app]
+publisher = "rwilliamspbg-ops"
+
+[tool.flwr.app.components]
+serverapp = "verifiableagg.server_app:app"
+clientapp = "verifiableagg.client_app:app"
+
+[tool.flwr.app.config]
+num-server-rounds = 5
+fraction-train = 1.0
+fraction-evaluate = 1.0
+local-epochs = 1
+learning-rate = 0.05
+batch-size = 32
+num-features = 10
+num-train-examples = 128
+num-val-examples = 64
+random-seed = 2026
+verify-aggregation = true
+verification-tolerance = 1e-6
+artifacts-dir = "artifacts"
+
+[tool.flwr.federations]
+default = "local-simulation"
+
+[tool.flwr.federations.local-simulation]
+options.num-supernodes = 8
+options.backend.client-resources.num-cpus = 1
+options.backend.client-resources.num-gpus = 0.0
@@ -0,0 +1,5 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+flwr run . --run-config "num-server-rounds=5 random-seed=2026 verify-aggregation=true"
+python benchmark_report.py --report-path artifacts/report.json
@@ -0,0 +1 @@
+"""Verifiableagg baseline package."""