|
1 | | -import argparse, json, logging, logging.handlers, re, sys, random, multiprocessing |
2 | | -from typing import Dict, Any, List |
3 | | -from z3 import Solver, Bool, Not, sat |
4 | 1 | import gradio as gr |
5 | | -import yaml |
6 | | - |
7 | | -from src.ckmm import ( |
8 | | - FuelLedger, ReplayStability, CouncilRunner, |
9 | | - ConstraintEvaluator, SubstrateInfo, evaluate_ckmm_pass |
10 | | -) |
11 | | -from src.infra import gpu_status_safe |
12 | | -from src.reward_cvr import CVRReward, DuPOSelfVerifier |
13 | | -from src.aggregator_policy import AggregatorPolicy, CompressionGovernor |
14 | | -from src.adaptive_repair import AdaptiveRepair |
15 | | - |
16 | | -def _setup_logging(level_name: str = "ERROR"): |
17 | | - root = logging.getLogger() |
18 | | - root.handlers.clear() |
19 | | - root.setLevel(getattr(logging, level_name.upper(), logging.ERROR)) |
20 | | - fh = logging.handlers.RotatingFileHandler('errors.log', maxBytes=65536, backupCount=3) |
21 | | - fh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')) |
22 | | - ch = logging.StreamHandler(sys.stdout) |
23 | | - ch.setFormatter(logging.Formatter('[%(levelname)s] %(message)s')) |
24 | | - root.addHandler(fh) |
25 | | - root.addHandler(ch) |
26 | | - return root |
27 | | - |
28 | | -def _effective_iterations(req: int) -> int: |
29 | | - try: |
30 | | - cores = max(1, multiprocessing.cpu_count()) |
31 | | - except Exception: |
32 | | - cores = 1 |
33 | | - if req < 0: |
34 | | - return min(max(100_000, cores * 1_000_000), 10_000_000) |
35 | | - return min(req, 10_000_000) |
36 | | - |
37 | | -def _toy_model(prompt: str) -> str: |
38 | | - text = (prompt or "").lower() |
39 | | - if "exception" in text: |
40 | | - return "no" |
41 | | - if "myth" in text: |
42 | | - return "yes" |
43 | | - return "yes" |
44 | | - |
45 | | -def _ckmm_summary(prompt: str, iterations: int, thresholds: Dict[str, Any], verbose: bool=False) -> Dict[str, Any]: |
46 | | - # Energy |
47 | | - with FuelLedger() as fuel: |
48 | | - acc = 0 |
49 | | - for i in range(max(0, int(iterations))): |
50 | | - if i % 1_000_000 == 0 and i > 0: |
51 | | - fuel.tick_attention(1) |
52 | | - acc += i % 7 |
53 | | - fuel_d = fuel.to_dict() |
54 | | - |
55 | | - # Temporal stability |
56 | | - temporal = ReplayStability(repeats=5).run(_toy_model, prompt) |
57 | | - |
58 | | - # Council |
59 | | - council = CouncilRunner([_toy_model, _toy_model, _toy_model]).run(prompt) |
60 | | - |
61 | | - # Ethics |
62 | | - ethics = ConstraintEvaluator("config/constraints.sample.yaml").evaluate(_toy_model(prompt), "") |
63 | | - |
64 | | - # Embodiment |
65 | | - substrate = SubstrateInfo().capture() |
66 | | - |
67 | | - # Decision |
68 | | - passes = evaluate_ckmm_pass(fuel_d, temporal, council, ethics, substrate, thresholds) |
69 | | - |
70 | | - # Pretty numbers |
71 | | - wall_s = fuel_d.get("wall_s", 0.0) |
72 | | - kib = fuel_d.get("kib", 0.0) |
73 | | - mib = kib / 1024.0 if kib else 0.0 |
74 | | - stab = temporal.get("stability", 0.0) |
75 | | - agree = council.get("agreement", 0.0) |
76 | | - penalty = ethics.get("penalty", 0) |
77 | | - |
78 | | - summary = { |
79 | | - "pass": passes.get("all_ok", False), |
80 | | - "fuel_wall_s": round(wall_s, 6), |
81 | | - "fuel_mib": round(mib, 4), |
82 | | - "stability": round(stab, 4), |
83 | | - "council": round(agree, 4), |
84 | | - "penalty": int(penalty), |
85 | | - "weighted_council_ok": passes.get("weighted_council_ok", False) |
| 2 | +from z3 import Solver, Bool, sat |
| 3 | + |
| 4 | + |
| 5 | +def toy_penguin_sat(prompt: str) -> str: |
| 6 | + """ |
| 7 | + Tiny symbolic check. |
| 8 | + If the prompt contains the phrase 'penguin on ice' return sat. |
| 9 | + Else return unsat. |
| 10 | + """ |
| 11 | + s = Solver() |
| 12 | + x = Bool("penguin_on_ice") |
| 13 | + want = "penguin on ice" in (prompt or "").lower() |
| 14 | + s.add(x == want) |
| 15 | + result = "sat" if s.check() == sat else "unsat" |
| 16 | + return result |
| 17 | + |
| 18 | + |
| 19 | +def ckmm_l_metrics(prompt: str) -> dict: |
| 20 | + """ |
| 21 | + Public safe stub numbers. These do not reveal any private method. |
| 22 | + They simply map string features to stable floats for a friendly readout. |
| 23 | + """ |
| 24 | + text = (prompt or "").strip() |
| 25 | + base = len(text) / 100.0 |
| 26 | + base = max(0.0, min(base, 1.0)) |
| 27 | + return { |
| 28 | + "Fuel": round(0.6 + 0.4 * base, 2), |
| 29 | + "Temporal": round(0.5 + 0.3 * base, 2), |
| 30 | + "Relational": round(0.4 + 0.35 * base, 2), |
| 31 | + "Ethics": round(0.7 + 0.25 * base, 2), |
| 32 | + "Embodiment": round(0.3 + 0.5 * base, 2), |
86 | 33 | } |
87 | | - if verbose: |
88 | | - summary.update({ |
89 | | - "fuel": fuel_d, |
90 | | - "temporal": temporal, |
91 | | - "council_detail": council, |
92 | | - "ethics_detail": ethics, |
93 | | - "substrate": substrate, |
94 | | - "thresholds": thresholds, |
95 | | - "gpu": gpu_status_safe() |
96 | | - }) |
97 | | - return summary |
98 | | - |
99 | | -def _format_ckmm_line(s: Dict[str, Any]) -> str: |
100 | | - state = "PASS" if s.get("pass") else "FAIL" |
101 | | - wc = "weighted ok" if s.get("weighted_council_ok") else "weighted no" |
102 | | - return f"ckmm {state} | fuel {s['fuel_wall_s']} s and {s['fuel_mib']} MiB | stab {s['stability']} | council {s['council']} [{wc}] | penalty {s['penalty']}" |
103 | | - |
104 | | -def _generate_candidates(prompt: str, k: int, seed: int) -> List[str]: |
105 | | - random.seed(seed) |
106 | | - base = _toy_model(prompt) |
107 | | - cands = [base for _ in range(k)] |
108 | | - # introduce slight diversity for 'myth' |
109 | | - if "myth" in (prompt or "").lower(): |
110 | | - cands = ["yes" if random.random() > 0.2 else "no" for _ in range(k)] |
111 | | - return cands |
112 | | - |
113 | | -def make_checker(args, thresholds: Dict[str, Any]): |
114 | | - iterations = _effective_iterations(args.ckmm_iterations) |
115 | | - agg = AggregatorPolicy() |
116 | | - gov = CompressionGovernor() |
117 | | - cvr = CVRReward.load("configs/cvr_dopo.yaml") |
118 | | - dupo = DuPOSelfVerifier() |
119 | | - adaptive = AdaptiveRepair(eta=args.adaptive_eta) if args.adaptive else None |
120 | | - |
121 | | - def check_penguin(query: str) -> str: |
122 | | - try: |
123 | | - # Z3 baseline facts |
124 | | - s = Solver() |
125 | | - Penguin, Fly = Bool('Penguin'), Bool('Fly') |
126 | | - s.add(Penguin) |
127 | | - s.add(Not(Fly)) |
128 | | - |
129 | | - text = (query or "").lower() |
130 | | - if "fly" in text and re.search(r"\b(must|always|definitely)\b", text): |
131 | | - s2 = Solver() |
132 | | - s2.add(Penguin) |
133 | | - s2.add(Fly) |
134 | | - res = s2.check() |
135 | | - ckmm = _ckmm_summary(query, iterations, thresholds, verbose=bool(args.json)) |
136 | | - if adaptive: |
137 | | - adaptive.step(ckmm) |
138 | | - ckmm["adaptive_weights"] = adaptive.weights() |
139 | | - if res != sat: |
140 | | - out = "unsat: conflict detected. Score=0" |
141 | | - else: |
142 | | - out = f"unexpected: {res}" |
143 | | - return json.dumps(ckmm, sort_keys=True, indent=2) if args.json else (out + " | " + _format_ckmm_line(ckmm) + (f" | adaptive {ckmm['adaptive_weights']}" if 'adaptive_weights' in ckmm else "")) |
144 | | - |
145 | | - # Sampling and aggregation |
146 | | - k = max(3, int(args.samples)) |
147 | | - cands = _generate_candidates(query, k, args.seed) |
148 | | - rewards = [cvr.score(query, a) for a in cands] |
149 | | - if args.dupo: |
150 | | - _ = dupo.dual_check(query, cands) |
151 | | - choice = agg.aggregate(cands, rewards, {}) |
152 | | - concise = gov.select(cands) |
153 | | - |
154 | | - # Distortion score toy calc |
155 | | - modal = 0.7 if "myth" in text else (0.6 if "exception" in text else 1.0) |
156 | | - diversity = 2.0 |
157 | | - divergence = 1.0 - (0.7 if "myth" in text else 1.0) |
158 | | - score = (modal * diversity) / (1.0 + divergence) if (1.0 + divergence) > 0 else 0.0 |
159 | | - |
160 | | - ckmm = _ckmm_summary(query, iterations, thresholds, verbose=bool(args.json)) |
161 | | - if adaptive: |
162 | | - adaptive.step(ckmm) |
163 | | - ckmm["adaptive_weights"] = adaptive.weights() |
164 | | - |
165 | | - if args.json: |
166 | | - ckmm.update({"distortion_score": round(score, 3), "choice": choice, "concise": concise}) |
167 | | - return json.dumps(ckmm, sort_keys=True, indent=2) |
168 | | - |
169 | | - line = f"sat: penguins do not fly. Score={score:.3f} | " + _format_ckmm_line(ckmm) |
170 | | - if 'adaptive_weights' in ckmm: |
171 | | - line += f" | adaptive {ckmm['adaptive_weights']}" |
172 | | - return line |
173 | | - |
174 | | - except Exception: |
175 | | - logging.getLogger().error("check_penguin failed", exc_info=True) |
176 | | - return "error: see errors.log" |
177 | | - return check_penguin |
178 | | - |
179 | | -def _parse_thresholds(th_json: str) -> Dict[str, Any]: |
180 | | - if not th_json: |
181 | | - return {} |
182 | | - try: |
183 | | - import jsonschema, json as _json |
184 | | - data = _json.loads(th_json) |
185 | | - if not isinstance(data, dict): |
186 | | - raise ValueError("thresholds must be a JSON object") |
187 | | - jsonschema.validate(instance=data, schema={"type":"object"}) |
188 | | - return data |
189 | | - except Exception: |
190 | | - logging.getLogger().warning("jsonschema not available or invalid thresholds, using parsed JSON if possible") |
191 | | - try: |
192 | | - import json as _json |
193 | | - return _json.loads(th_json) |
194 | | - except Exception: |
195 | | - return {} |
196 | | - |
197 | | -def main(): |
198 | | - ap = argparse.ArgumentParser() |
199 | | - ap.add_argument("--share", action="store_true", help="Public share (gradio.live)") |
200 | | - ap.add_argument("--server_name", type=str, default=None, help="Gradio server name") |
201 | | - ap.add_argument("--server_port", type=int, default=None, help="Gradio server port") |
202 | | - ap.add_argument("--log-level", type=str, default="ERROR", |
203 | | - choices=["DEBUG", "INFO", "WARNING", "ERROR"], |
204 | | - help="Logging level for errors.log and console") |
205 | | - |
206 | | - # CKMM controls |
207 | | - ap.add_argument("--ckmm-iterations", type=int, default=5_000_000, help="Workload iterations; negative autoscale") |
208 | | - ap.add_argument("--ckmm-thresholds", type=str, default="", help="JSON to override parts of config/ckmm_config.yaml") |
209 | | - |
210 | | - # Output modes |
211 | | - ap.add_argument("--summary", action="store_true", default=True, help="Human readable summary output (default)") |
212 | | - ap.add_argument("--json", action="store_true", help="Print verbose JSON instead of summary") |
213 | | - |
214 | | - # Novelty features |
215 | | - ap.add_argument("--dupo", action="store_true", help="Enable DuPO paired checks") |
216 | | - ap.add_argument("--samples", type=int, default=5, help="Number of samples for aggregation") |
217 | | - ap.add_argument("--seed", type=int, default=7, help="Random seed for sampling") |
218 | | - ap.add_argument("--real-data", action="store_true", help="Use tiny real data stubs") |
219 | | - |
220 | | - # Adaptive repair |
221 | | - ap.add_argument("--adaptive", action="store_true", help="Enable adaptive repair controller driven by CKMM signals") |
222 | | - ap.add_argument("--adaptive-eta", type=float, default=0.1, help="Learning rate for adaptive repair") |
223 | | - |
224 | | - args = ap.parse_args() |
225 | | - _setup_logging(args.log_level) |
226 | | - |
227 | | - # Validate args |
228 | | - try: |
229 | | - if args.server_port is not None and args.server_port <= 0: |
230 | | - raise ValueError("Port must be a positive integer") |
231 | | - if args.server_name is not None and not isinstance(args.server_name, str): |
232 | | - raise ValueError("Server name must be a string") |
233 | | - except ValueError as e: |
234 | | - logging.getLogger().error(e, exc_info=True); sys.exit(1) |
235 | | - |
236 | | - # Load thresholds |
237 | | - with open("config/ckmm_config.yaml", "r", encoding="utf-8") as f: |
238 | | - base_th = yaml.safe_load(f) or {} |
239 | | - override = _parse_thresholds(args.ckmm_thresholds) |
240 | | - for k, v in override.items(): |
241 | | - if isinstance(v, dict) and isinstance(base_th.get(k), dict): |
242 | | - base_th[k].update(v) |
243 | | - else: |
244 | | - base_th[k] = v |
245 | 34 |
|
246 | | - checker = make_checker(args, base_th) |
247 | 35 |
|
248 | | - title = f"Penguin Distortion Tester — fuel≤{base_th.get('fuel', {}).get('max_time_s', 'n/a')}s | GPU {gpu_status_safe().get('gpus',0)}" |
249 | | - iface = gr.Interface(fn=checker, inputs="text", outputs="text", title=title) |
250 | | - iface.launch(server_name=args.server_name, server_port=args.server_port, share=args.share) |
| 36 | +def audit(prompt: str): |
| 37 | + result = toy_penguin_sat(prompt) |
| 38 | + m = ckmm_l_metrics(prompt) |
| 39 | + lines = [ |
| 40 | + f"Result: {result}", |
| 41 | + f"Fuel={m['Fuel']:.2f} Temporal={m['Temporal']:.2f} " |
| 42 | + f"Relational={m['Relational']:.2f} Ethics={m['Ethics']:.2f} " |
| 43 | + f"Embodiment={m['Embodiment']:.2f}" |
| 44 | + ] |
| 45 | + return "\n".join(lines) |
| 46 | + |
| 47 | + |
| 48 | +with gr.Blocks(title="Penguin Distortion Tester") as demo: |
| 49 | + gr.Markdown("# Penguin Distortion Tester") |
| 50 | + gr.Markdown( |
| 51 | + "Enter a short prompt such as **penguin on ice** and run the audit. " |
| 52 | + "Numbers are public safe placeholders." |
| 53 | + ) |
| 54 | + inp = gr.Textbox(label="Prompt", value="penguin on ice") |
| 55 | + out = gr.Textbox(label="Audit Output", lines=4) |
| 56 | + btn = gr.Button("Run audit") |
| 57 | + btn.click(fn=audit, inputs=inp, outputs=out) |
251 | 58 |
|
252 | 59 | if __name__ == "__main__": |
253 | | - main() |
| 60 | + demo.launch(server_name="0.0.0.0", server_port=7860) |
0 commit comments