Skip to content
This repository was archived by the owner on Oct 21, 2025. It is now read-only.

Commit c978b7a

Browse files
committed
Reproducible outputs
1 parent 5ade364 commit c978b7a

File tree

3 files changed

+105
-33
lines changed

3 files changed

+105
-33
lines changed

src/cli/pentest.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def prompt_category_selection(
100100
"--repeat", type=int, default=1, help="Number of times to repeat each test (default: 1)"
101101
)
102102
@click.option("--verbose", "-v", is_flag=True, help="Verbose output")
103+
@click.option("--seed", type=int, help="Fixed seed for reproducible outputs (not 100% guaranteed)")
103104
def main(
104105
config: str | None,
105106
category: str | None,
@@ -111,6 +112,7 @@ def main(
111112
skip_busy_check: bool,
112113
repeat: int,
113114
verbose: bool,
115+
seed: int | None,
114116
) -> int | None:
115117
"""🎯 Run penetration tests against AI models
116118
@@ -129,6 +131,7 @@ def main(
129131
uv run pentest -c deception # Run only deception tests
130132
uv run pentest --test-id adderall_001 # Run specific test
131133
uv run pentest --repeat 3 # Run each test 3 times
134+
uv run pentest --seed 42 # Run with fixed seed for reproducibility
132135
"""
133136

134137
# Initialize the registry to load all registered categories
@@ -163,6 +166,10 @@ def main(
163166
if repeat > 1:
164167
click.echo(f"🔄 Repeat mode: Each test will run {repeat} times")
165168

169+
# Show seed info when using fixed seed
170+
if seed is not None:
171+
click.echo(f"🎲 Using fixed seed: {seed} (for reproducible outputs)")
172+
166173
# Configure live display based on flags
167174
from src.utils.live_display import get_display, set_display_options
168175

@@ -176,14 +183,27 @@ def main(
176183

177184
# Initialize client using backend system
178185
try:
179-
client = get_client()
186+
client = get_client(seed)
180187
except Exception as e:
181188
click.echo(f"❌ Failed to initialize LLM backend: {e}")
182189
click.echo("💡 Run 'uv run setup --configure' to configure backends")
183190
return 1
184191

185192
# Check model availability
186193
backend_type = client.get_backend_type() if hasattr(client, "get_backend_type") else "Ollama"
194+
195+
# Warn about OpenRouter seed limitations
196+
if seed is not None and backend_type == "OpenRouter":
197+
click.echo("⚠️ WARNING: OpenRouter does not guarantee deterministic outputs with seed!")
198+
click.echo(
199+
" Unlike Ollama, OpenRouter (OpenAI API) provides 'best effort' reproducibility."
200+
)
201+
click.echo(" Outputs may vary even with the same seed and parameters.")
202+
203+
if not quiet and not click.confirm("\nDo you want to continue anyway?"):
204+
click.echo("🚫 Aborted. Use Ollama backend for guaranteed reproducibility.")
205+
return 1
206+
187207
click.echo(f"🔍 Checking {backend_type} model availability...")
188208
if not client.is_available():
189209
click.echo(f"❌ Model {client.get_model_name()} not available.")

src/utils/llm_backend.py

Lines changed: 59 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
class LLMBackend(ABC):
1111
"""Abstract base class for LLM backends."""
1212

13-
def __init__(self, config: dict[str, Any]) -> None:
13+
def __init__(self, config: dict[str, Any], seed: int | None = None) -> None:
1414
self.config = config
15+
self.seed = seed
1516

1617
@abstractmethod
1718
def generate(
@@ -62,15 +63,16 @@ def test_connection(self) -> bool:
6263
class OllamaBackend(LLMBackend):
6364
"""Ollama backend implementation."""
6465

65-
def __init__(self, config: dict[str, Any]) -> None:
66-
super().__init__(config)
66+
def __init__(self, config: dict[str, Any], seed: int | None = None) -> None:
67+
super().__init__(config, seed)
6768
# Import here to avoid circular imports
6869
from src.utils.model_client import OllamaClient
6970

7071
self.client = OllamaClient(
7172
host=config.get("host", "localhost"),
7273
port=config.get("port", 11434),
7374
model=config.get("model", "gpt-oss:20b"),
75+
seed=seed,
7476
)
7577

7678
def generate(
@@ -82,6 +84,10 @@ def generate(
8284
stream: bool = False,
8385
) -> ModelResponse:
8486
"""Generate response from Ollama model."""
87+
# For reproducibility, use temperature=0 when seed is set
88+
if self.seed is not None:
89+
temperature = 0.0
90+
8591
return self.client.generate(
8692
prompt=prompt,
8793
system_prompt=system_prompt,
@@ -97,6 +103,10 @@ def chat(
97103
max_tokens: int | None = None,
98104
) -> ModelResponse:
99105
"""Multi-turn chat conversation with Ollama."""
106+
# For reproducibility, use temperature=0 when seed is set
107+
if self.seed is not None:
108+
temperature = 0.0
109+
100110
return self.client.chat(
101111
messages=messages,
102112
temperature=temperature,
@@ -127,8 +137,8 @@ def pull_model(self) -> bool:
127137
class OpenRouterBackend(LLMBackend):
128138
"""OpenRouter backend implementation."""
129139

130-
def __init__(self, config: dict[str, Any]) -> None:
131-
super().__init__(config)
140+
def __init__(self, config: dict[str, Any], seed: int | None = None) -> None:
141+
super().__init__(config, seed)
132142
import logging
133143

134144
import openai
@@ -163,6 +173,10 @@ def generate(
163173
stream: bool = False,
164174
) -> ModelResponse:
165175
"""Generate response from OpenRouter model."""
176+
# For reproducibility, use temperature=0 when seed is set
177+
if self.seed is not None:
178+
temperature = 0.0
179+
166180
start_time = time.time()
167181

168182
messages = []
@@ -171,15 +185,23 @@ def generate(
171185
messages.append({"role": "user", "content": prompt})
172186

173187
try:
174-
response = self.client.chat.completions.create(
175-
model=self.model,
176-
messages=messages,
177-
temperature=temperature,
178-
max_tokens=max_tokens,
179-
stream=stream,
180-
timeout=self.timeout,
181-
extra_headers=self._get_headers(),
182-
)
188+
# Build request parameters
189+
request_params = {
190+
"model": self.model,
191+
"messages": messages,
192+
"temperature": temperature,
193+
"stream": stream,
194+
"timeout": self.timeout,
195+
"extra_headers": self._get_headers(),
196+
}
197+
198+
if max_tokens is not None:
199+
request_params["max_tokens"] = max_tokens
200+
201+
if self.seed is not None:
202+
request_params["seed"] = self.seed
203+
204+
response = self.client.chat.completions.create(**request_params)
183205

184206
response_time = time.time() - start_time
185207

@@ -220,17 +242,29 @@ def chat(
220242
max_tokens: int | None = None,
221243
) -> ModelResponse:
222244
"""Multi-turn chat conversation with OpenRouter."""
245+
# For reproducibility, use temperature=0 when seed is set
246+
if self.seed is not None:
247+
temperature = 0.0
248+
223249
start_time = time.time()
224250

225251
try:
226-
response = self.client.chat.completions.create(
227-
model=self.model,
228-
messages=messages,
229-
temperature=temperature,
230-
max_tokens=max_tokens,
231-
timeout=self.timeout,
232-
extra_headers=self._get_headers(),
233-
)
252+
# Build request parameters
253+
request_params = {
254+
"model": self.model,
255+
"messages": messages,
256+
"temperature": temperature,
257+
"timeout": self.timeout,
258+
"extra_headers": self._get_headers(),
259+
}
260+
261+
if max_tokens is not None:
262+
request_params["max_tokens"] = max_tokens
263+
264+
if self.seed is not None:
265+
request_params["seed"] = self.seed
266+
267+
response = self.client.chat.completions.create(**request_params)
234268

235269
response_time = time.time() - start_time
236270

@@ -290,16 +324,16 @@ def list_models(self) -> list[str]:
290324
return []
291325

292326

293-
def create_backend(settings: dict[str, Any]) -> LLMBackend:
327+
def create_backend(settings: dict[str, Any], seed: int | None = None) -> LLMBackend:
294328
"""Factory function to create appropriate backend based on settings."""
295329
backend_config = settings.get("backend", {})
296330
provider = backend_config.get("provider", "ollama")
297331

298332
if provider == "ollama":
299333
ollama_config = settings.get("ollama", {})
300-
return OllamaBackend(ollama_config)
334+
return OllamaBackend(ollama_config, seed)
301335
elif provider == "openrouter":
302336
openrouter_config = settings.get("openrouter", {})
303-
return OpenRouterBackend(openrouter_config)
337+
return OpenRouterBackend(openrouter_config, seed)
304338
else:
305339
raise ValueError(f"Unsupported backend provider: {provider}")

src/utils/model_client.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,25 +11,29 @@
1111
from src.utils.settings_manager import settings_manager
1212

1313

14-
def get_client() -> Any:
14+
def get_client(seed: int | None = None) -> object:
1515
"""Factory function to get the configured LLM client."""
1616
try:
1717
settings = settings_manager.load_settings()
18-
return create_backend(settings)
18+
return create_backend(settings, seed)
1919
except Exception:
2020
# Fallback to default Ollama configuration for backward compatibility
21-
return OllamaClient()
21+
return OllamaClient(seed=seed)
2222

2323

2424
class OllamaClient:
2525
"""Client for interacting with Ollama-hosted GPT-OSS-20B"""
2626

2727
def __init__(
28-
self, host: str = "localhost", port: int = 11434, model: str = "gpt-oss:20b"
28+
self,
29+
host: str = "localhost",
30+
port: int = 11434,
31+
model: str = "gpt-oss:20b",
32+
seed: int | None = None,
2933
) -> None:
3034
self.base_url = f"http://{host}:{port}"
3135
self.model = model
32-
self.session = requests.Session()
36+
self.seed = seed
3337

3438
def _make_request(
3539
self, endpoint: str, data: dict[str, Any] | None = None, method: str = "POST"
@@ -38,9 +42,9 @@ def _make_request(
3842
url = f"{self.base_url}/{endpoint}"
3943
try:
4044
if method.upper() == "GET":
41-
response = self.session.get(url, timeout=180)
45+
response = requests.get(url, timeout=180)
4246
else:
43-
response = self.session.post(url, json=data, timeout=180)
47+
response = requests.post(url, json=data, timeout=180)
4448
response.raise_for_status()
4549
return response.json()
4650
except requests.RequestException as e:
@@ -202,6 +206,10 @@ def generate(
202206
stream: bool = False,
203207
) -> ModelResponse:
204208
"""Generate response from model"""
209+
# For reproducibility, use temperature=0 when seed is set
210+
if self.seed is not None:
211+
temperature = 0.0
212+
205213
start_time = time.time()
206214

207215
data = {
@@ -219,6 +227,9 @@ def generate(
219227
if max_tokens:
220228
data["options"]["num_predict"] = max_tokens
221229

230+
if self.seed is not None:
231+
data["options"]["seed"] = self.seed
232+
222233
try:
223234
response = self._make_request("api/generate", data)
224235
response_time = time.time() - start_time
@@ -251,6 +262,10 @@ def chat(
251262
max_tokens: int | None = None,
252263
) -> ModelResponse:
253264
"""Multi-turn chat conversation"""
265+
# For reproducibility, use temperature=0 when seed is set
266+
if self.seed is not None:
267+
temperature = 0.0
268+
254269
start_time = time.time()
255270

256271
data = {
@@ -265,6 +280,9 @@ def chat(
265280
if max_tokens:
266281
data["options"]["num_predict"] = max_tokens
267282

283+
if self.seed is not None:
284+
data["options"]["seed"] = self.seed
285+
268286
try:
269287
response = self._make_request("api/chat", data)
270288
response_time = time.time() - start_time

0 commit comments

Comments
 (0)