1010class LLMBackend (ABC ):
1111 """Abstract base class for LLM backends."""
1212
13- def __init__ (self , config : dict [str , Any ]) -> None :
13+ def __init__ (self , config : dict [str , Any ], seed : int | None = None ) -> None :
1414 self .config = config
15+ self .seed = seed
1516
1617 @abstractmethod
1718 def generate (
@@ -62,15 +63,16 @@ def test_connection(self) -> bool:
6263class OllamaBackend (LLMBackend ):
6364 """Ollama backend implementation."""
6465
65- def __init__ (self , config : dict [str , Any ]) -> None :
66- super ().__init__ (config )
66+ def __init__ (self , config : dict [str , Any ], seed : int | None = None ) -> None :
67+ super ().__init__ (config , seed )
6768 # Import here to avoid circular imports
6869 from src .utils .model_client import OllamaClient
6970
7071 self .client = OllamaClient (
7172 host = config .get ("host" , "localhost" ),
7273 port = config .get ("port" , 11434 ),
7374 model = config .get ("model" , "gpt-oss:20b" ),
75+ seed = seed ,
7476 )
7577
7678 def generate (
@@ -82,6 +84,10 @@ def generate(
8284 stream : bool = False ,
8385 ) -> ModelResponse :
8486 """Generate response from Ollama model."""
87+ # For reproducibility, use temperature=0 when seed is set
88+ if self .seed is not None :
89+ temperature = 0.0
90+
8591 return self .client .generate (
8692 prompt = prompt ,
8793 system_prompt = system_prompt ,
@@ -97,6 +103,10 @@ def chat(
97103 max_tokens : int | None = None ,
98104 ) -> ModelResponse :
99105 """Multi-turn chat conversation with Ollama."""
106+ # For reproducibility, use temperature=0 when seed is set
107+ if self .seed is not None :
108+ temperature = 0.0
109+
100110 return self .client .chat (
101111 messages = messages ,
102112 temperature = temperature ,
@@ -127,8 +137,8 @@ def pull_model(self) -> bool:
127137class OpenRouterBackend (LLMBackend ):
128138 """OpenRouter backend implementation."""
129139
130- def __init__ (self , config : dict [str , Any ]) -> None :
131- super ().__init__ (config )
140+ def __init__ (self , config : dict [str , Any ], seed : int | None = None ) -> None :
141+ super ().__init__ (config , seed )
132142 import logging
133143
134144 import openai
@@ -163,6 +173,10 @@ def generate(
163173 stream : bool = False ,
164174 ) -> ModelResponse :
165175 """Generate response from OpenRouter model."""
176+ # For reproducibility, use temperature=0 when seed is set
177+ if self .seed is not None :
178+ temperature = 0.0
179+
166180 start_time = time .time ()
167181
168182 messages = []
@@ -171,15 +185,23 @@ def generate(
171185 messages .append ({"role" : "user" , "content" : prompt })
172186
173187 try :
174- response = self .client .chat .completions .create (
175- model = self .model ,
176- messages = messages ,
177- temperature = temperature ,
178- max_tokens = max_tokens ,
179- stream = stream ,
180- timeout = self .timeout ,
181- extra_headers = self ._get_headers (),
182- )
188+ # Build request parameters
189+ request_params = {
190+ "model" : self .model ,
191+ "messages" : messages ,
192+ "temperature" : temperature ,
193+ "stream" : stream ,
194+ "timeout" : self .timeout ,
195+ "extra_headers" : self ._get_headers (),
196+ }
197+
198+ if max_tokens is not None :
199+ request_params ["max_tokens" ] = max_tokens
200+
201+ if self .seed is not None :
202+ request_params ["seed" ] = self .seed
203+
204+ response = self .client .chat .completions .create (** request_params )
183205
184206 response_time = time .time () - start_time
185207
@@ -220,17 +242,29 @@ def chat(
220242 max_tokens : int | None = None ,
221243 ) -> ModelResponse :
222244 """Multi-turn chat conversation with OpenRouter."""
245+ # For reproducibility, use temperature=0 when seed is set
246+ if self .seed is not None :
247+ temperature = 0.0
248+
223249 start_time = time .time ()
224250
225251 try :
226- response = self .client .chat .completions .create (
227- model = self .model ,
228- messages = messages ,
229- temperature = temperature ,
230- max_tokens = max_tokens ,
231- timeout = self .timeout ,
232- extra_headers = self ._get_headers (),
233- )
252+ # Build request parameters
253+ request_params = {
254+ "model" : self .model ,
255+ "messages" : messages ,
256+ "temperature" : temperature ,
257+ "timeout" : self .timeout ,
258+ "extra_headers" : self ._get_headers (),
259+ }
260+
261+ if max_tokens is not None :
262+ request_params ["max_tokens" ] = max_tokens
263+
264+ if self .seed is not None :
265+ request_params ["seed" ] = self .seed
266+
267+ response = self .client .chat .completions .create (** request_params )
234268
235269 response_time = time .time () - start_time
236270
@@ -290,16 +324,16 @@ def list_models(self) -> list[str]:
290324 return []
291325
292326
293- def create_backend (settings : dict [str , Any ]) -> LLMBackend :
327+ def create_backend (settings : dict [str , Any ], seed : int | None = None ) -> LLMBackend :
294328 """Factory function to create appropriate backend based on settings."""
295329 backend_config = settings .get ("backend" , {})
296330 provider = backend_config .get ("provider" , "ollama" )
297331
298332 if provider == "ollama" :
299333 ollama_config = settings .get ("ollama" , {})
300- return OllamaBackend (ollama_config )
334+ return OllamaBackend (ollama_config , seed )
301335 elif provider == "openrouter" :
302336 openrouter_config = settings .get ("openrouter" , {})
303- return OpenRouterBackend (openrouter_config )
337+ return OpenRouterBackend (openrouter_config , seed )
304338 else :
305339 raise ValueError (f"Unsupported backend provider: { provider } " )
0 commit comments