Skip to content
This repository was archived by the owner on Oct 21, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 27 additions & 3 deletions src/cli/pentest.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def prompt_category_selection(
"--repeat", type=int, default=1, help="Number of times to repeat each test (default: 1)"
)
@click.option("--verbose", "-v", is_flag=True, help="Verbose output")
@click.option("--seed", type=int, help="Fixed seed for reproducible outputs (not 100% guaranteed)")
def main(
config: str | None,
category: str | None,
Expand All @@ -111,6 +112,7 @@ def main(
skip_busy_check: bool,
repeat: int,
verbose: bool,
seed: int | None,
) -> int | None:
"""🎯 Run penetration tests against AI models

Expand All @@ -129,6 +131,7 @@ def main(
uv run pentest -c deception # Run only deception tests
uv run pentest --test-id adderall_001 # Run specific test
uv run pentest --repeat 3 # Run each test 3 times
uv run pentest --seed 42 # Run with fixed seed for reproducibility
"""

# Initialize the registry to load all registered categories
Expand Down Expand Up @@ -163,6 +166,10 @@ def main(
if repeat > 1:
click.echo(f"🔄 Repeat mode: Each test will run {repeat} times")

# Show seed info when using fixed seed
if seed is not None:
click.echo(f"🎲 Using fixed seed: {seed} (for reproducible outputs)")

# Configure live display based on flags
from src.utils.live_display import get_display, set_display_options

Expand All @@ -176,14 +183,27 @@ def main(

# Initialize client using backend system
try:
client = get_client()
client = get_client(seed)
except Exception as e:
click.echo(f"❌ Failed to initialize LLM backend: {e}")
click.echo("💡 Run 'uv run setup --configure' to configure backends")
return 1

# Check model availability
backend_type = client.get_backend_type() if hasattr(client, "get_backend_type") else "Ollama"

# Warn about OpenRouter seed limitations
if seed is not None and backend_type == "OpenRouter":
click.echo("⚠️ WARNING: OpenRouter does not guarantee deterministic outputs with seed!")
click.echo(
" Unlike Ollama, OpenRouter (OpenAI API) provides 'best effort' reproducibility."
)
click.echo(" Outputs may vary even with the same seed and parameters.")

if not quiet and not click.confirm("\nDo you want to continue anyway?"):
click.echo("🚫 Aborted. Use Ollama backend for guaranteed reproducibility.")
return 1

click.echo(f"🔍 Checking {backend_type} model availability...")
if not client.is_available():
click.echo(f"❌ Model {client.get_model_name()} not available.")
Expand All @@ -193,10 +213,14 @@ def main(
click.echo(f"✅ {backend_type} model {client.get_model_name()} ready")

# Check if backend is busy before starting tests (Ollama only)
if not skip_busy_check and hasattr(client, "check_status"):
if (
not skip_busy_check
and hasattr(client, "check_status")
and callable(getattr(client, "check_status", None))
):
click.echo(f"🔍 Checking {backend_type} status...")
try:
status = client.check_status()
status = client.check_status() # type: ignore

if status.is_busy:
click.echo(f"⚠️ WARNING: {backend_type} appears busy!")
Expand Down
84 changes: 59 additions & 25 deletions src/utils/llm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
class LLMBackend(ABC):
"""Abstract base class for LLM backends."""

def __init__(self, config: dict[str, Any]) -> None:
def __init__(self, config: dict[str, Any], seed: int | None = None) -> None:
self.config = config
self.seed = seed

@abstractmethod
def generate(
Expand Down Expand Up @@ -62,15 +63,16 @@ def test_connection(self) -> bool:
class OllamaBackend(LLMBackend):
"""Ollama backend implementation."""

def __init__(self, config: dict[str, Any]) -> None:
super().__init__(config)
def __init__(self, config: dict[str, Any], seed: int | None = None) -> None:
super().__init__(config, seed)
# Import here to avoid circular imports
from src.utils.model_client import OllamaClient

self.client = OllamaClient(
host=config.get("host", "localhost"),
port=config.get("port", 11434),
model=config.get("model", "gpt-oss:20b"),
seed=seed,
)

def generate(
Expand All @@ -82,6 +84,10 @@ def generate(
stream: bool = False,
) -> ModelResponse:
"""Generate response from Ollama model."""
# For reproducibility, use temperature=0 when seed is set
if self.seed is not None:
temperature = 0.0

return self.client.generate(
prompt=prompt,
system_prompt=system_prompt,
Expand All @@ -97,6 +103,10 @@ def chat(
max_tokens: int | None = None,
) -> ModelResponse:
"""Multi-turn chat conversation with Ollama."""
# For reproducibility, use temperature=0 when seed is set
if self.seed is not None:
temperature = 0.0

return self.client.chat(
messages=messages,
temperature=temperature,
Expand Down Expand Up @@ -127,8 +137,8 @@ def pull_model(self) -> bool:
class OpenRouterBackend(LLMBackend):
"""OpenRouter backend implementation."""

def __init__(self, config: dict[str, Any]) -> None:
super().__init__(config)
def __init__(self, config: dict[str, Any], seed: int | None = None) -> None:
super().__init__(config, seed)
import logging

import openai
Expand Down Expand Up @@ -163,6 +173,10 @@ def generate(
stream: bool = False,
) -> ModelResponse:
"""Generate response from OpenRouter model."""
# For reproducibility, use temperature=0 when seed is set
if self.seed is not None:
temperature = 0.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above.


start_time = time.time()

messages = []
Expand All @@ -171,15 +185,23 @@ def generate(
messages.append({"role": "user", "content": prompt})

try:
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
stream=stream,
timeout=self.timeout,
extra_headers=self._get_headers(),
)
# Build request parameters
request_params = {
"model": self.model,
"messages": messages,
"temperature": temperature,
"stream": stream,
"timeout": self.timeout,
"extra_headers": self._get_headers(),
}

if max_tokens is not None:
request_params["max_tokens"] = max_tokens

if self.seed is not None:
request_params["seed"] = self.seed

response = self.client.chat.completions.create(**request_params)

response_time = time.time() - start_time

Expand Down Expand Up @@ -220,17 +242,29 @@ def chat(
max_tokens: int | None = None,
) -> ModelResponse:
"""Multi-turn chat conversation with OpenRouter."""
# For reproducibility, use temperature=0 when seed is set
if self.seed is not None:
temperature = 0.0

start_time = time.time()

try:
response = self.client.chat.completions.create(
model=self.model,
messages=messages,
temperature=temperature,
max_tokens=max_tokens,
timeout=self.timeout,
extra_headers=self._get_headers(),
)
# Build request parameters
request_params = {
"model": self.model,
"messages": messages,
"temperature": temperature,
"timeout": self.timeout,
"extra_headers": self._get_headers(),
}

if max_tokens is not None:
request_params["max_tokens"] = max_tokens

if self.seed is not None:
request_params["seed"] = self.seed

response = self.client.chat.completions.create(**request_params)

response_time = time.time() - start_time

Expand Down Expand Up @@ -290,16 +324,16 @@ def list_models(self) -> list[str]:
return []


def create_backend(settings: dict[str, Any]) -> LLMBackend:
def create_backend(settings: dict[str, Any], seed: int | None = None) -> LLMBackend:
"""Factory function to create appropriate backend based on settings."""
backend_config = settings.get("backend", {})
provider = backend_config.get("provider", "ollama")

if provider == "ollama":
ollama_config = settings.get("ollama", {})
return OllamaBackend(ollama_config)
return OllamaBackend(ollama_config, seed)
elif provider == "openrouter":
openrouter_config = settings.get("openrouter", {})
return OpenRouterBackend(openrouter_config)
return OpenRouterBackend(openrouter_config, seed)
else:
raise ValueError(f"Unsupported backend provider: {provider}")
52 changes: 43 additions & 9 deletions src/utils/model_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,42 @@

import subprocess
import time
from typing import Any
from typing import TYPE_CHECKING, Any, Union

import requests
from requests.exceptions import Timeout
from src.models import ModelResponse, OllamaStatus
from src.utils.llm_backend import create_backend
from src.utils.settings_manager import settings_manager

if TYPE_CHECKING:
from src.utils.llm_backend import LLMBackend

def get_client() -> Any:

def get_client(seed: int | None = None) -> Union["LLMBackend", "OllamaClient"]:
"""Factory function to get the configured LLM client."""
from src.utils.llm_backend import create_backend

try:
settings = settings_manager.load_settings()
return create_backend(settings)
return create_backend(settings, seed)
except Exception:
# Fallback to default Ollama configuration for backward compatibility
return OllamaClient()
return OllamaClient(seed=seed)


class OllamaClient:
"""Client for interacting with Ollama-hosted GPT-OSS-20B"""

def __init__(
self, host: str = "localhost", port: int = 11434, model: str = "gpt-oss:20b"
self,
host: str = "localhost",
port: int = 11434,
model: str = "gpt-oss:20b",
seed: int | None = None,
) -> None:
self.base_url = f"http://{host}:{port}"
self.model = model
self.session = requests.Session()
self.seed = seed

def _make_request(
self, endpoint: str, data: dict[str, Any] | None = None, method: str = "POST"
Expand All @@ -38,9 +46,9 @@ def _make_request(
url = f"{self.base_url}/{endpoint}"
try:
if method.upper() == "GET":
response = self.session.get(url, timeout=180)
response = requests.get(url, timeout=180)
else:
response = self.session.post(url, json=data, timeout=180)
response = requests.post(url, json=data, timeout=180)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
Expand Down Expand Up @@ -202,6 +210,10 @@ def generate(
stream: bool = False,
) -> ModelResponse:
"""Generate response from model"""
# For reproducibility, use temperature=0 when seed is set
if self.seed is not None:
temperature = 0.0

start_time = time.time()

data = {
Expand All @@ -219,6 +231,9 @@ def generate(
if max_tokens:
data["options"]["num_predict"] = max_tokens

if self.seed is not None:
data["options"]["seed"] = self.seed

try:
response = self._make_request("api/generate", data)
response_time = time.time() - start_time
Expand Down Expand Up @@ -251,6 +266,10 @@ def chat(
max_tokens: int | None = None,
) -> ModelResponse:
"""Multi-turn chat conversation"""
# For reproducibility, use temperature=0 when seed is set
if self.seed is not None:
temperature = 0.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

again


start_time = time.time()

data = {
Expand All @@ -265,6 +284,9 @@ def chat(
if max_tokens:
data["options"]["num_predict"] = max_tokens

if self.seed is not None:
data["options"]["seed"] = self.seed

try:
response = self._make_request("api/chat", data)
response_time = time.time() - start_time
Expand Down Expand Up @@ -295,6 +317,18 @@ def get_backend_type(self) -> str:
"""Get the backend type identifier (for compatibility)."""
return "Ollama"

def get_model_name(self) -> str:
"""Get the model name (for compatibility)."""
return self.model

def is_available(self) -> bool:
"""Check if model is available (for compatibility)."""
return self.is_model_available()

def check_status(self) -> OllamaStatus:
"""Check Ollama status (for compatibility)."""
return self.check_ollama_status()


def test_connection() -> bool | None:
"""Test Ollama connection and model availability"""
Expand Down
Loading