Distributed shard servers (end-to-end)

ATOM00blue · ATOM00blue · commit 9c84c5e9b29a · 2026-05-31T14:09:36.000+05:30
- wolfdb.server: ThreadingHTTPServer wrapping a Memory; JSON remember/recall/forget/snapshot/compact/gc/inspect; per-server lock; optional bearer-token auth; wolf-server CLI
- wolfdb.cluster.DistributedMemory: stdlib client, subject-routed writes, parallel fan-out recall merged top-k -&gt; recall escapes the GIL across processes
- test_cluster.py: 5 real-HTTP e2e tests (routing, supersession, fan-out, forget, auth)
- benchmarks/distributed.py spawns real subprocess servers; README distributed section
- 74 tests, ruff+mypy clean
diff --git a/README.md b/README.md
@@ -156,13 +156,32 @@ mem.remember("Alice works at Acme", subject="Alice", predicate="works_at", objec
 mem.recall("where does Alice work?", k=5)         # fans out to all shards, merges top-k
 ```
 
+  `ShardedMemory` parallelizes writes in-process; for parallel **recall** (escaping the GIL),
+  run each shard as its own process with the bundled server and route with `DistributedMemory`:
+
+```bash
+wolf-server ./shard0 --port 8100      # one process per shard (set WOLF_TOKEN to require auth)
+wolf-server ./shard1 --port 8101
+```
+
+```python
+from wolfdb import DistributedMemory
+
+mem = DistributedMemory(["http://127.0.0.1:8100", "http://127.0.0.1:8101"])
+mem.remember("Bob likes tea", subject="Bob", predicate="likes", object="tea")
+mem.recall("tea", k=5)                 # fan-out over the network; shards score in parallel
+```
+
+> The server binds to localhost and is unauthenticated unless you set a token
+> (`WOLF_TOKEN` env or `serve(token=...)`). Put it behind TLS + auth before exposing it.
+
 ## Repository layout
 
 ```
 docs/research/             competitive teardown + domain mastery
 docs/problem-statement.md  the precise gap WolfDB fills
 docs/design/               data model, storage, write path, retrieval, API
-src/wolfdb/                engine, storage (local + S3), index, embedders, scoring, cli
+src/wolfdb/                engine, storage (local + S3), index, embedders, scoring, shard, server, cluster, cli
 tests/                     unit + property (hypothesis) + concurrency + S3 tests
 benchmarks/                micro-benchmarks
 examples/                  runnable quickstart
@@ -171,10 +190,10 @@ examples/                  runnable quickstart
 ## Roadmap
 
 Reference implementation is Python. Shipped: object-storage backends, vectorized recall, an
-inverted index so hybrid/keyword recall scales sublinearly, size-tiered auto-compaction, and
-horizontal sharding. Next: a Rust production engine, a local disk cache in front of object
-storage, distributed (cross-process) shard servers, and HNSW-on-object-storage for
-billion-scale recall.
+inverted index so hybrid/keyword recall scales sublinearly, size-tiered auto-compaction,
+horizontal sharding, and distributed shard servers (one process per shard, fan-out recall).
+Next: a Rust production engine, a local disk cache in front of object storage, and
+HNSW-on-object-storage for billion-scale recall.
 
 ## License
 
diff --git a/benchmarks/distributed.py b/benchmarks/distributed.py
@@ -0,0 +1,70 @@
+"""Distributed benchmark: spawn K real shard-server processes and measure fan-out.
+
+Unlike in-process ShardedMemory (GIL-bound), each shard here is its own OS process,
+so recall fan-out runs truly in parallel. Run: python benchmarks/distributed.py [N] [K]
+"""
+import os
+import statistics
+import subprocess
+import sys
+import tempfile
+import time
+import urllib.error
+import urllib.request
+from concurrent.futures import ThreadPoolExecutor
+
+from wolfdb import DistributedMemory
+
+
+def _wait_ready(url: str, timeout: float = 15.0) -> None:
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        try:
+            urllib.request.urlopen(url + "/inspect", timeout=1.0).read()
+            return
+        except (urllib.error.URLError, ConnectionError):
+            time.sleep(0.1)
+    raise RuntimeError(f"server {url} did not start")
+
+
+def main(n: int, k: int) -> None:
+    root = tempfile.mkdtemp()
+    procs, urls = [], []
+    for i in range(k):
+        port = 8100 + i
+        procs.append(subprocess.Popen(
+            [sys.executable, "-m", "wolfdb.server", os.path.join(root, f"s{i}"),
+             "--port", str(port)], stdout=subprocess.DEVNULL))
+        urls.append(f"http://127.0.0.1:{port}")
+    try:
+        for u in urls:
+            _wait_ready(u)
+        client = DistributedMemory(urls)
+
+        t0 = time.perf_counter()
+        with ThreadPoolExecutor(max_workers=k * 4) as ex:
+            list(ex.map(lambda i: client.remember(
+                f"user {i} prefers product {i} in region {i % 13}",
+                subject=f"user{i}", predicate="pref", object=f"product{i}"), range(n)))
+        wdt = time.perf_counter() - t0
+        print(f"N={n} across {k} server processes")
+        print(f"write:         {n / wdt:,.0f} facts/s ({wdt:.1f}s)")
+        print(f"believed:      {client.inspect()['facts_believed']}")
+
+        for mode in ("vector", "hybrid"):
+            lat = []
+            for i in range(200):
+                s = time.perf_counter()
+                client.recall(f"product {i * 7}", k=10, mode=mode)
+                lat.append((time.perf_counter() - s) * 1000)
+            lat.sort()
+            print(f"recall {mode:7}: p50={statistics.median(lat):.2f}ms  "
+                  f"p95={lat[int(len(lat) * 0.95)]:.2f}ms")
+    finally:
+        for p in procs:
+            p.terminate()
+
+
+if __name__ == "__main__":
+    args = [a for a in sys.argv[1:] if not a.startswith("--")]
+    main(int(args[0]) if args else 4000, int(args[1]) if len(args) > 1 else 4)
diff --git a/pyproject.toml b/pyproject.toml
@@ -44,6 +44,7 @@ dev = [
 [project.scripts]
 wolf = "wolfdb.cli:main"
 wolfdb = "wolfdb.cli:main"
+wolf-server = "wolfdb.server:main"
 
 [tool.setuptools.packages.find]
 where = ["src"]
diff --git a/src/wolfdb/__init__.py b/src/wolfdb/__init__.py
@@ -1,4 +1,5 @@
 """WolfDB: open-source temporal memory database for AI agents."""
+from .cluster import DistributedMemory
 from .embedders import OpenAIEmbedder, SentenceTransformerEmbedder
 from .engine import Hit, Memory
 from .errors import Config, ConflictError, ValidationError, WolfError
@@ -15,5 +16,5 @@
     "Embedder", "Extractor", "HashingEmbedder", "IdentityExtractor",
     "OpenAIEmbedder", "SentenceTransformerEmbedder",
     "StorageBackend", "LocalBackend", "S3Backend", "Log", "Conflict",
-    "VectorIndex", "cosine_scores", "ShardedMemory",
+    "VectorIndex", "cosine_scores", "ShardedMemory", "DistributedMemory",
 ]
diff --git a/src/wolfdb/cluster.py b/src/wolfdb/cluster.py
@@ -0,0 +1,85 @@
+"""Distributed client: fan out across WolfDB shard servers (separate processes).
+
+Recall fans out over the network, so each shard's CPU work runs in its own process
+— escaping the GIL that limits in-process ``ShardedMemory``. Writes route by subject
+(keeping functional-edge supersession within a shard). Uses only the standard library.
+"""
+from __future__ import annotations
+
+import hashlib
+import json
+import urllib.request
+from concurrent.futures import ThreadPoolExecutor
+
+from .engine import Hit
+from .errors import ValidationError
+from .models import Fact
+
+
+def _route(key: str, n: int) -> int:
+    return int.from_bytes(hashlib.blake2b(key.encode("utf-8"), digest_size=8).digest(), "big") % n
+
+
+class DistributedMemory:
+    def __init__(self, urls: list[str], *, token: str | None = None, timeout: float = 30.0):
+        if not urls:
+            raise ValidationError("need at least one shard url")
+        self.urls = [u.rstrip("/") for u in urls]
+        self._token = token
+        self._timeout = timeout
+        self._pool = ThreadPoolExecutor(max_workers=len(self.urls))
+
+    # ---- transport ------------------------------------------------------
+    def _headers(self) -> dict:
+        h = {"Content-Type": "application/json"}
+        if self._token:
+            h["Authorization"] = f"Bearer {self._token}"
+        return h
+
+    def _call(self, url: str, path: str, payload: dict | None) -> dict:
+        data = None if payload is None else json.dumps(payload).encode("utf-8")
+        req = urllib.request.Request(url + path, data=data, headers=self._headers(),
+                                     method="POST" if data is not None else "GET")
+        with urllib.request.urlopen(req, timeout=self._timeout) as r:
+            return json.loads(r.read())
+
+    def _fanout(self, path: str, payload: dict | None) -> list[dict]:
+        return list(self._pool.map(lambda u: self._call(u, path, payload), self.urls))
+
+    # ---- API ------------------------------------------------------------
+    def remember(self, text: str = "", *, subject: str | None = None,
+                 partition_key: str | None = None, **kw) -> list[str]:
+        i = _route(str(partition_key or subject or text or ""), len(self.urls))
+        return self._call(self.urls[i], "/remember", {"text": text, "subject": subject, **kw})["ids"]
+
+    def recall(self, query: str, *, k: int = 10, **kw) -> list[Hit]:
+        payload = {"query": query, "k": k, **kw}
+        hits: list[Hit] = []
+        for part in self._fanout("/recall", payload):
+            hits.extend(Hit(fact=Fact.from_dict(h["fact"]), score=h["score"],
+                            components=h["components"]) for h in part["hits"])
+        hits.sort(key=lambda h: h.score, reverse=True)
+        return hits[:k]
+
+    def forget(self, fact_id: str) -> None:
+        self._fanout("/forget", {"fact_id": fact_id})  # broadcast; no-op where absent
+
+    def snapshot(self, **kw) -> list[Fact]:
+        return [Fact.from_dict(f) for part in self._fanout("/snapshot", kw) for f in part["facts"]]
+
+    def compact(self, **kw) -> int:
+        return sum(part["n"] for part in self._fanout("/compact", kw))
+
+    def gc(self) -> int:
+        return sum(part["n"] for part in self._fanout("/gc", {}))
+
+    def inspect(self) -> dict:
+        infos = self._fanout("/inspect", None)
+        agg: dict = {"shards": len(self.urls)}
+        for key in ("facts_total", "facts_believed", "entities", "edges_believed",
+                    "events", "segments"):
+            agg[key] = sum(i.get(key, 0) for i in infos)
+        return agg
+
+    def close(self) -> None:
+        self._pool.shutdown(wait=False)
diff --git a/src/wolfdb/server.py b/src/wolfdb/server.py
@@ -0,0 +1,106 @@
+"""HTTP shard server: exposes one Memory over JSON so shards run as separate
+processes (recall fan-out then escapes the GIL).
+
+Security: binds to 127.0.0.1 by default and has NO authentication unless a token
+is configured (``serve(token=...)`` or the ``WOLF_TOKEN`` env var). Do not expose
+to an untrusted network without a token and TLS termination in front.
+"""
+from __future__ import annotations
+
+import json
+import os
+import threading
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+
+from .engine import Memory
+
+
+def _handler(memory: Memory, token: str | None):
+    lock = threading.Lock()  # Memory is not internally thread-safe; serialize per server
+
+    class Handler(BaseHTTPRequestHandler):
+        def _auth_ok(self) -> bool:
+            return not token or self.headers.get("Authorization") == f"Bearer {token}"
+
+        def _send(self, code: int, obj) -> None:
+            body = json.dumps(obj).encode("utf-8")
+            self.send_response(code)
+            self.send_header("Content-Type", "application/json")
+            self.send_header("Content-Length", str(len(body)))
+            self.end_headers()
+            self.wfile.write(body)
+
+        def _body(self) -> dict:
+            n = int(self.headers.get("Content-Length", 0))
+            return json.loads(self.rfile.read(n) or b"{}")
+
+        def log_message(self, *_a):  # quiet
+            pass
+
+        def do_GET(self):
+            if not self._auth_ok():
+                return self._send(401, {"error": "unauthorized"})
+            if self.path == "/inspect":
+                with lock:
+                    return self._send(200, memory.inspect())
+            self._send(404, {"error": "not found"})
+
+        def do_POST(self):
+            if not self._auth_ok():
+                return self._send(401, {"error": "unauthorized"})
+            try:
+                p = self._body()
+                with lock:
+                    self._dispatch(p)
+            except Exception as exc:  # noqa: BLE001
+                self._send(400, {"error": str(exc)})
+
+        def _dispatch(self, p: dict) -> None:
+            if self.path == "/remember":
+                self._send(200, {"ids": memory.remember(p.pop("text", ""), **p)})
+            elif self.path == "/recall":
+                hits = memory.recall(p.pop("query", ""), **p)
+                self._send(200, {"hits": [{"fact": h.fact.to_dict(), "score": h.score,
+                                           "components": h.components} for h in hits]})
+            elif self.path == "/forget":
+                memory.forget(p["fact_id"])
+                self._send(200, {"ok": True})
+            elif self.path == "/snapshot":
+                self._send(200, {"facts": [f.to_dict() for f in memory.snapshot(**p)]})
+            elif self.path == "/compact":
+                self._send(200, {"n": memory.compact(**p)})
+            elif self.path == "/gc":
+                self._send(200, {"n": memory.gc()})
+            else:
+                self._send(404, {"error": "not found"})
+
+    return Handler
+
+
+def serve(memory: Memory, host: str = "127.0.0.1", port: int = 8080,
+          token: str | None = None) -> ThreadingHTTPServer:
+    """Build a ThreadingHTTPServer for `memory`. Call serve_forever() to run it."""
+    return ThreadingHTTPServer((host, port), _handler(memory, token))
+
+
+def main(argv=None) -> int:
+    import argparse
+    ap = argparse.ArgumentParser(prog="wolf-server", description="WolfDB shard server")
+    ap.add_argument("path", help="path to this shard's .wolf store")
+    ap.add_argument("--host", default="127.0.0.1")
+    ap.add_argument("--port", type=int, default=8080)
+    ap.add_argument("--namespace", default="wolf")
+    a = ap.parse_args(argv)
+    token = os.environ.get("WOLF_TOKEN")
+    httpd = serve(Memory.open(a.path, namespace=a.namespace), a.host, a.port, token)
+    note = "" if token else "  [NO AUTH — bind to localhost only]"
+    print(f"WolfDB shard serving {a.path} at http://{a.host}:{a.port}{note}", flush=True)
+    try:
+        httpd.serve_forever()
+    except KeyboardInterrupt:
+        httpd.shutdown()
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/tests/test_cluster.py b/tests/test_cluster.py