3030 get_openrouter_api_key ,
3131 require_api_key ,
3232)
33- from .record import Record , RecordStore , get_env_records_dir
33+ from .record import Record , RecordStore , capture_caller_context , copy_context , get_env_records_dir
3434from .response_adapter import ResponseTypeAdapter
3535
3636
37- def set_rate_limiter_verbose (verbose : bool ) -> None :
38- """Enable or disable verbose logging for all rate limiters."""
39- from .clients .anthropic_client import (
40- set_rate_limiter_verbose as set_anthropic_verbose ,
41- )
42- from .clients .google_client import set_rate_limiter_verbose as set_gemini_verbose
43- from .clients .mistral_client import set_rate_limiter_verbose as set_mistral_verbose
44- from .clients .openai_client import set_rate_limiter_verbose as set_openai_verbose
45-
46- set_anthropic_verbose (verbose )
47- set_gemini_verbose (verbose )
48- set_openai_verbose (verbose )
49- set_mistral_verbose (verbose )
50-
51-
5237class Covenance :
5338 """LLM client with isolated API keys and call records.
5439
@@ -192,18 +177,23 @@ def ask_llm[T](
192177 sys_msg : str | None = None ,
193178 * ,
194179 max_parsing_retries : int = 2 ,
180+ temperature : float | None = None ,
195181 ) -> T :
196- """Route to appropriate provider and make LLM call.
182+ """Route to appropriate provider and make LLM call with given reponse type .
197183
198184 Args:
199185 user_msg: User message/prompt
200186 model: Model name - determines provider routing
201187 response_type: Type for structured output. Can be:
202188 - None or str: returns plain text
203189 - Pydantic model: returns model instance
204- - list[X], tuple[...], etc.: wraps in Pydantic, then unwraps
190+ - int, bool, float, list[X], tuple[...] - simple python types
205191 sys_msg: Optional system message
206192 max_parsing_retries: Retries for structured output parsing errors
193+ temperature: Sampling temperature. None uses provider default.
194+ Range varies by provider (Anthropic: 0-1, others: 0-2).
195+ Note: temperature=0 aims for determinism but doesn't guarantee it
196+ due to GPU floating-point non-determinism and backend variability.
207197 """
208198 provider = self ._get_provider (model )
209199 client = self ._get_client (provider )
@@ -239,6 +229,7 @@ def ask_llm[T](
239229 model = model ,
240230 client_override = client ,
241231 record_store = self ._record_store ,
232+ temperature = temperature ,
242233 )
243234 if llm_type not in (None , str ):
244235 try :
@@ -277,6 +268,9 @@ def llm_consensus[T](
277268 integration_model: Model for integration (defaults to same as model)
278269 parallel: Whether to make calls in parallel (default: True)
279270 """
271+ # Capture caller info before any calls (especially before spawning threads)
272+ capture_caller_context ()
273+
280274 if num_candidates == 1 :
281275 return self .ask_llm (
282276 user_msg = user_msg ,
@@ -302,8 +296,9 @@ def make_candidate_call(call_index: int) -> T:
302296 candidates : list [T ] = []
303297 if parallel :
304298 with ThreadPoolExecutor (max_workers = num_candidates ) as executor :
299+ # Each thread needs its own context copy
305300 futures = [
306- executor .submit (make_candidate_call , i )
301+ executor .submit (copy_context (). run , make_candidate_call , i )
307302 for i in range (num_candidates )
308303 ]
309304 for future in as_completed (futures ):
@@ -386,24 +381,23 @@ def print_usage(self, title: str | None = None, cost_format: str = "plain") -> N
386381_default_client = Covenance (label = "default client" , records_dir = get_env_records_dir ())
387382
388383
389- def get_default_client () -> Covenance :
390- return _default_client
391-
392-
393384def ask_llm [T ](
394385 user_msg : str ,
395386 model : str ,
396387 response_type : type [T ] | None = None ,
397388 sys_msg : str | None = None ,
398389 * ,
399390 max_parsing_retries : int = 2 ,
391+ temperature : float | None = None ,
400392) -> T :
393+ """See docstring in the class method."""
401394 return _default_client .ask_llm (
402395 user_msg = user_msg ,
403396 model = model ,
404397 response_type = response_type ,
405398 sys_msg = sys_msg ,
406399 max_parsing_retries = max_parsing_retries ,
400+ temperature = temperature ,
407401 )
408402
409403
@@ -418,6 +412,7 @@ def llm_consensus[T](
418412 integration_model : str | None = None ,
419413 parallel : bool = True ,
420414) -> T :
415+ """See docstring in the class method."""
421416 return _default_client .llm_consensus (
422417 user_msg = user_msg ,
423418 model = model ,
0 commit comments