Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ Configure once with `metaclaw setup`, then `metaclaw start` brings up the proxy,
| Mode | Default | What it does |
|------|---------|--------------|
| `skills_only` | | Proxy your LLM API. Skills injected and auto-summarized after each session. No GPU/Tinker required. |
| `verified_skills` | | Same as `skills_only`, but skill promotion is gated by a pluggable verifier (local by default, no network required). |
| `rl` | | Skills + RL training (GRPO). Trains immediately when a batch is full. Optional OPD for teacher distillation. |
| `auto` | ✅ | Skills + RL + smart scheduler. RL weight updates only run during sleep/idle/meeting windows. |

Expand Down Expand Up @@ -285,7 +286,7 @@ When you start MetaClaw, the command waits until the local proxy becomes healthy
<summary><b>Full config reference (click to expand)</b></summary>

```yaml
mode: auto # "auto" | "rl" | "skills_only"
mode: auto # "auto" | "rl" | "skills_only" | "verified_skills"
claw_type: openclaw # "openclaw" | "copaw" | "ironclaw" | "picoclaw" | "zeroclaw" | "nanoclaw" | "nemoclaw" | "hermes" | "none"

llm:
Expand Down Expand Up @@ -574,3 +575,20 @@ MetaClaw builds on top of the following open-source projects:
## 📄 License

This project is licensed under the [MIT License](LICENSE).


### Verified skills (optional)

`verified_skills` adds a verifier gate before a generated skill is persisted. By default, use:

```yaml
mode: verified_skills
verification:
enabled: true
verifier: local
require_pass_for_promotion: true
allow_indeterminate_promotion: false
redact_inputs: true
```

Remote verifiers are optional (`remote_http` and `settlement_witness`). SettlementWitness is supported as an adapter only and is **not required**.
58 changes: 58 additions & 0 deletions metaclaw/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import re
import threading
import time
from datetime import datetime
from itertools import count
from typing import Any, Optional

Expand All @@ -34,6 +35,8 @@
from openai import OpenAI

from .config import MetaClawConfig
from .verification import build_verifier_safe
from .verification.base import VerificationResult, VerificationVerdict
from .data_formatter import ConversationSample
from .memory.scope import base_scope, derive_memory_scope
from .prm_scorer import PRMScorer
Expand Down Expand Up @@ -516,6 +519,7 @@ def __init__(
self.prm_scorer = prm_scorer
self.skill_evolver = skill_evolver
self.memory_manager = memory_manager
self._verifier = build_verifier_safe(config, logger=logger)
# Optional LastRequestTracker for scheduler idle detection
self._last_request_tracker = last_request_tracker

Expand Down Expand Up @@ -2141,9 +2145,63 @@ async def _evolve_skills_for_session(self, turns: list[dict]):
added = 0
for skill in new_skills:
category = skill.get("category", "general")
if self.config.mode == "verified_skills":
task_id, spec, output, context = self._build_verification_packet(skill)
vr = self._verifier.verify(task_id=task_id, spec=spec, output=output, context=context)
skill.setdefault("metadata", {})["promotion_source"] = "verified_skills"
skill["metadata"]["verification"] = self._verification_metadata(vr)
if (
vr.verifier_id == "null"
and vr.reason_code == "VERIFIER_CONFIG_ERROR"
and self.config.verification.require_pass_for_promotion
and not self.config.verification.allow_fallback_promotion_on_config_error
):
vr = VerificationResult(
verdict=VerificationVerdict.INDETERMINATE,
verifier_id="null",
reason_code="VERIFIER_CONFIG_ERROR",
message="Verifier configuration failed.",
)
if self.config.verification.require_pass_for_promotion and vr.verdict != VerificationVerdict.PASS:
bucket = "pending_review" if vr.verdict == VerificationVerdict.INDETERMINATE and not self.config.verification.allow_indeterminate_promotion else "rejected"
self._audit_skill_decision(bucket, skill, vr)
continue
added += self.skill_manager.add_skills([skill], category=category)
logger.info("[SkillEvolver] session analysis added %d new skills", added)


def _build_verification_packet(self, skill: dict) -> tuple[str, dict, dict, dict]:
from datetime import datetime
ts = datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
task_id = f"skill-promotion-{skill.get('name','unknown')}-{ts}"
spec = {"verification_type": "skill_promotion", "skill_name": skill.get("name", ""), "criteria": ["non_empty", "has_evaluation"]}
output = {
"skill_name": skill.get("name", ""),
"skill_content": skill.get("content", ""),
"evaluation_criteria": ["generated_from_session"],
"evaluation_result": {"passed": True, "criteria_satisfied": ["generated_from_session"]},
}
existing_contents = []
if self.skill_manager:
for group in self.skill_manager.skills.values():
if isinstance(group, list):
existing_contents.extend([x.get("content", "") for x in group if isinstance(x, dict)])
elif isinstance(group, dict):
for arr in group.values():
existing_contents.extend([x.get("content", "") for x in arr if isinstance(x, dict)])
context = {"source": "skills_only_session_analysis", "existing_skill_contents": existing_contents, "redacted": True}
return task_id, spec, output, context

def _verification_metadata(self, vr: VerificationResult) -> dict:
return {"verified": vr.verdict == VerificationVerdict.PASS and vr.verifier_id != "null", "verifier_id": vr.verifier_id, "verdict": vr.verdict.value, "confidence": vr.confidence, "reason_code": vr.reason_code, "receipt_id": vr.receipt_id, "verifier_kid": vr.verifier_kid}

def _audit_skill_decision(self, bucket: str, skill: dict, vr: VerificationResult) -> None:
base = os.path.join(self.config.skills_dir, bucket)
os.makedirs(base, exist_ok=True)
path = os.path.join(base, f"{skill.get('name','unknown')}.json")
with open(path, "w", encoding="utf-8") as f:
json.dump({"skill_name": skill.get("name"), "timestamp": datetime.utcnow().isoformat() + "Z", "promotion_source": self.config.mode, "verifier_id": vr.verifier_id, "verdict": vr.verdict.value, "reason_code": vr.reason_code, "message": vr.message, "receipt_id": vr.receipt_id}, f)

# ------------------------------------------------------------------ #
# Skill injection #
# ------------------------------------------------------------------ #
Expand Down
2 changes: 1 addition & 1 deletion metaclaw/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def setup():
@metaclaw.command()
@click.option(
"--mode",
type=click.Choice(["auto", "skills_only", "rl"]),
type=click.Choice(["auto", "skills_only", "verified_skills", "rl"]),
default=None,
help="Override operating mode for this session.",
)
Expand Down
16 changes: 16 additions & 0 deletions metaclaw/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@
"""

from dataclasses import dataclass, field
@dataclass
class VerificationConfig:
enabled: bool = False
verifier: str = "local"
require_pass_for_promotion: bool = True
allow_indeterminate_promotion: bool = False
rollback_threshold: int = 3
store_receipts: bool = True
redact_inputs: bool = True
endpoint: str = ""
agent_id: str = ""
timeout_seconds: int = 10
allow_fallback_promotion_on_config_error: bool = False


@dataclass
Expand Down Expand Up @@ -130,6 +143,7 @@ class MetaClawConfig:
# "auto" — v0.3: RL + scheduler (trains during idle/sleep windows)
# "rl" — v0.2: RL without scheduler (trains immediately on full batch)
# "skills_only" — proxy + skill injection only (no Tinker, no RL)
# "verified_skills" — like skills_only, but gate promotion via verifier
mode: str = "auto"
# When True (RL/auto mode only), the trainer does NOT run its own
# collection loop. Instead it waits for ``metaclaw train-step`` CLI
Expand Down Expand Up @@ -176,3 +190,5 @@ class MetaClawConfig:
# WeChat (official openclaw-weixin plugin, auto-installed) #
# ------------------------------------------------------------------ #
wechat_enabled: bool = False

verification: VerificationConfig = field(default_factory=VerificationConfig)
16 changes: 16 additions & 0 deletions metaclaw/config_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,19 @@
"wechat": {
"enabled": False,
},
"verification": {
"enabled": False,
"verifier": "local",
"require_pass_for_promotion": True,
"allow_indeterminate_promotion": False,
"rollback_threshold": 3,
"store_receipts": True,
"redact_inputs": True,
"endpoint": "",
"agent_id": "",
"timeout_seconds": 10,
"allow_fallback_promotion_on_config_error": False,
},
}


Expand Down Expand Up @@ -185,6 +198,7 @@ def to_metaclaw_config(self) -> MetaClawConfig:
sched = data.get("scheduler", {})
sched_cal = sched.get("calendar", {})
wx = data.get("wechat", {})
verification = data.get("verification", {})
mode = data.get("mode", "auto")
rl_enabled = mode in ("rl", "auto") or bool(rl.get("enabled", False))

Expand Down Expand Up @@ -236,6 +250,7 @@ def to_metaclaw_config(self) -> MetaClawConfig:
except Exception:
pass

from .config import VerificationConfig
return MetaClawConfig(
# Mode
mode=mode,
Expand Down Expand Up @@ -329,6 +344,7 @@ def describe(self) -> str:
rl = data.get("rl", {})
memory = data.get("memory", {})
wx = data.get("wechat", {})
verification = data.get("verification", {})
mode = data.get("mode", "auto")
lines = [
f"mode: {mode}",
Expand Down
4 changes: 2 additions & 2 deletions metaclaw/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ async def start(self):
if mode == "auto":
cfg.scheduler_enabled = True
await self._start_rl(cfg)
elif mode == "skills_only":
elif mode in ("skills_only", "verified_skills"):
await self._start_skills_only(cfg)
else:
await self._start_rl(cfg)
Expand Down Expand Up @@ -341,7 +341,7 @@ def _setup_evolver_env(self, cfg):
pre-existing OPENAI_* env vars (force-assign). In other modes the
existing env vars win (setdefault), preserving previous behaviour.
"""
force = cfg.mode == "skills_only"
force = cfg.mode in ("skills_only", "verified_skills")
_set = (lambda k, v: os.environ.__setitem__(k, v)) if force else os.environ.setdefault
if cfg.evolver_api_base:
_set("OPENAI_BASE_URL", cfg.evolver_api_base)
Expand Down
2 changes: 1 addition & 1 deletion metaclaw/setup_wizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def run(self):
current_mode = existing.get("mode", "auto")
mode = _prompt_choice(
"Operating mode",
["auto", "skills_only", "rl"],
["auto", "skills_only", "verified_skills", "rl"],
default=current_mode,
)

Expand Down
20 changes: 14 additions & 6 deletions metaclaw/skill_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import glob
import logging
import os
import json
from typing import Any, Dict, List, Optional

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -127,12 +128,15 @@ def _parse_skill_md(path: str) -> Optional[Dict[str, Any]]:
logger.warning("[SkillManager] skipping %s — missing name or description", path)
return None

return {
"name": name,
"description": description,
"category": category,
"content": body,
}
result = {"name": name, "description": description, "category": category, "content": body}
meta_path = os.path.join(os.path.dirname(path), "metadata.json")
if os.path.exists(meta_path):
try:
with open(meta_path, encoding="utf-8") as mf:
result["metadata"] = json.load(mf)
except Exception:
pass
return result


# ------------------------------------------------------------------ #
Expand Down Expand Up @@ -495,6 +499,10 @@ def _write_skill_md(self, skill: dict) -> None:
with open(path, "w", encoding="utf-8") as f:
f.write(text)
logger.info("[SkillManager] wrote skill file: %s", path)
metadata = skill.get("metadata")
if isinstance(metadata, dict):
with open(os.path.join(skill_dir, "metadata.json"), "w", encoding="utf-8") as mf:
json.dump(metadata, mf, ensure_ascii=False, indent=2)
except OSError as e:
logger.warning("[SkillManager] could not write %s: %s", path, e)

Expand Down
2 changes: 2 additions & 0 deletions metaclaw/verification/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .base import VerificationResult, VerificationVerdict, VerifierInterface
from .factory import build_verifier, build_verifier_safe
27 changes: 27 additions & 0 deletions metaclaw/verification/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, Optional

class VerificationVerdict(str, Enum):
PASS = "PASS"
FAIL = "FAIL"
INDETERMINATE = "INDETERMINATE"

@dataclass
class VerificationResult:
verdict: VerificationVerdict
confidence: float = 0.0
verifier_id: str = "unknown"
reason_code: Optional[str] = None
message: Optional[str] = None
receipt_id: Optional[str] = None
verifier_kid: Optional[str] = None
receipt: Optional[Dict[str, Any]] = None
evidence: Dict[str, Any] = field(default_factory=dict)

class VerifierInterface(ABC):
@abstractmethod
def verify(self, *, task_id: str, spec: Dict[str, Any], output: Dict[str, Any], context: Optional[Dict[str, Any]] = None) -> VerificationResult:
raise NotImplementedError
29 changes: 29 additions & 0 deletions metaclaw/verification/factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import logging
from .local_verifier import LocalVerifier
from .null_verifier import NullVerifier
from .remote_http_verifier import RemoteHTTPVerifier
from .settlement_witness import SettlementWitnessVerifier


def build_verifier(config):
vc = config.verification
if not vc.enabled:
return NullVerifier()
if vc.verifier == "local":
return LocalVerifier()
if vc.verifier == "remote_http":
return RemoteHTTPVerifier(vc)
if vc.verifier == "settlement_witness":
return SettlementWitnessVerifier(vc)
if vc.verifier == "null":
return NullVerifier()
raise ValueError(f"Unknown verifier: {vc.verifier}")


def build_verifier_safe(config, logger=None):
logger = logger or logging.getLogger(__name__)
try:
return build_verifier(config)
except Exception as exc:
logger.warning("Verifier configuration failed; falling back to NullVerifier: %s", exc)
return NullVerifier(reason_code="VERIFIER_CONFIG_ERROR")
18 changes: 18 additions & 0 deletions metaclaw/verification/local_verifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from .base import VerificationResult, VerificationVerdict, VerifierInterface

class LocalVerifier(VerifierInterface):
def verify(self, *, task_id, spec, output, context=None):
context = context or {}
skill_name = (output.get("skill_name") or "").strip()
content = (output.get("skill_content") or "").strip()
if not skill_name or not content:
return VerificationResult(VerificationVerdict.FAIL, verifier_id="local", reason_code="MALFORMED_SKILL")
if not output.get("evaluation_criteria") or output.get("evaluation_result") is None:
return VerificationResult(VerificationVerdict.FAIL, verifier_id="local", reason_code="MISSING_EVAL")
existing_contents = context.get("existing_skill_contents") or []
if content in existing_contents:
return VerificationResult(VerificationVerdict.FAIL, verifier_id="local", reason_code="DUPLICATE_SKILL_CONTENT")
satisfied = (output.get("evaluation_result") or {}).get("criteria_satisfied") or []
if not satisfied:
return VerificationResult(VerificationVerdict.INDETERMINATE, verifier_id="local", reason_code="INSUFFICIENT_IMPROVEMENT_EVIDENCE")
return VerificationResult(VerificationVerdict.PASS, verifier_id="local", confidence=0.72, reason_code="LOCAL_CHECKS_PASSED")
8 changes: 8 additions & 0 deletions metaclaw/verification/null_verifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from .base import VerificationResult, VerificationVerdict, VerifierInterface

class NullVerifier(VerifierInterface):
def __init__(self, reason_code: str | None = None):
self.reason_code = reason_code

def verify(self, *, task_id, spec, output, context=None):
return VerificationResult(verdict=VerificationVerdict.PASS, verifier_id="null", reason_code=self.reason_code)
15 changes: 15 additions & 0 deletions metaclaw/verification/remote_http_verifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from __future__ import annotations
import requests
from .base import VerificationResult, VerificationVerdict, VerifierInterface

class RemoteHTTPVerifier(VerifierInterface):
def __init__(self, config):
self.endpoint = config.endpoint
self.timeout = config.timeout_seconds
def verify(self, *, task_id, spec, output, context=None):
try:
r = requests.post(self.endpoint, json={"task_id": task_id, "spec": spec, "output": output, "context": context or {}}, timeout=self.timeout)
data = r.json()
return VerificationResult(VerificationVerdict(data.get("verdict", "INDETERMINATE")), confidence=float(data.get("confidence", 0.0)), verifier_id=data.get("verifier_id", "remote_http"), reason_code=data.get("reason_code"), message=data.get("message"), receipt_id=data.get("receipt_id"), verifier_kid=data.get("verifier_kid"), receipt=data.get("receipt"), evidence=data.get("evidence") or {})
except Exception:
return VerificationResult(VerificationVerdict.INDETERMINATE, verifier_id="remote_http", reason_code="VERIFIER_UNAVAILABLE", message="Remote verifier unavailable or timed out.")
5 changes: 5 additions & 0 deletions metaclaw/verification/settlement_witness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .remote_http_verifier import RemoteHTTPVerifier

class SettlementWitnessVerifier(RemoteHTTPVerifier):
def verify(self, *, task_id, spec, output, context=None):
return super().verify(task_id=task_id, spec=spec, output=output, context=context)
Loading