ikamensh
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎covenance/__init__.py‎
Lines changed: 0 additions & 4 deletions b/‎covenance/__init__.py‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎covenance/_version.py‎
Lines changed: 2 additions & 2 deletions b/‎covenance/_version.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎covenance/client.py‎
Lines changed: 18 additions & 23 deletions b/‎covenance/client.py‎
Lines changed: 18 additions & 23 deletions
diff --git a/‎covenance/clients/anthropic_client.py‎
Lines changed: 4 additions & 0 deletions b/‎covenance/clients/anthropic_client.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎covenance/clients/google_client.py‎
Lines changed: 4 additions & 0 deletions b/‎covenance/clients/google_client.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎covenance/clients/grok_client.py‎
Lines changed: 2 additions & 0 deletions b/‎covenance/clients/grok_client.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎covenance/clients/mistral_client.py‎
Lines changed: 3 additions & 1 deletion b/‎covenance/clients/mistral_client.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎covenance/clients/openai_client.py‎
Lines changed: 12 additions & 4 deletions b/‎covenance/clients/openai_client.py‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎covenance/clients/openrouter_client.py‎
Lines changed: 2 additions & 0 deletions b/‎covenance/clients/openrouter_client.py‎
Lines changed: 2 additions & 0 deletions
@@ -266,7 +266,8 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
 
+todo
 
 covenance/_version.py
@@ -8,9 +8,7 @@
 from .client import (
     Covenance,
     ask_llm,
-    get_default_client,
     llm_consensus,
-    set_rate_limiter_verbose,
 )
 from .record import (
     Record,
@@ -27,8 +25,6 @@
     "ask_llm",
     "llm_consensus",
     "Covenance",
-    "get_default_client",
-    "set_rate_limiter_verbose",
     # Call records
     "Record",
     "get_records",
 
@@ -28,7 +28,7 @@
 commit_id: COMMIT_ID
 __commit_id__: COMMIT_ID
 
-__version__ = version = '0.0.3.dev5+ge86644923.d20260127'
-__version_tuple__ = version_tuple = (0, 0, 3, 'dev5', 'ge86644923.d20260127')
+__version__ = version = '0.0.3.dev12+g68cbf2a1f.d20260128'
+__version_tuple__ = version_tuple = (0, 0, 3, 'dev12', 'g68cbf2a1f.d20260128')
 
 __commit_id__ = commit_id = None
@@ -30,25 +30,10 @@
     get_openrouter_api_key,
     require_api_key,
 )
-from .record import Record, RecordStore, get_env_records_dir
+from .record import Record, RecordStore, capture_caller_context, copy_context, get_env_records_dir
 from .response_adapter import ResponseTypeAdapter
 
 
-def set_rate_limiter_verbose(verbose: bool) -> None:
-    """Enable or disable verbose logging for all rate limiters."""
-    from .clients.anthropic_client import (
-        set_rate_limiter_verbose as set_anthropic_verbose,
-    )
-    from .clients.google_client import set_rate_limiter_verbose as set_gemini_verbose
-    from .clients.mistral_client import set_rate_limiter_verbose as set_mistral_verbose
-    from .clients.openai_client import set_rate_limiter_verbose as set_openai_verbose
-
-    set_anthropic_verbose(verbose)
-    set_gemini_verbose(verbose)
-    set_openai_verbose(verbose)
-    set_mistral_verbose(verbose)
-
-
 class Covenance:
     """LLM client with isolated API keys and call records.
 
@@ -192,18 +177,23 @@ def ask_llm[T](
         sys_msg: str | None = None,
         *,
         max_parsing_retries: int = 2,
+        temperature: float | None = None,
     ) -> T:
-        """Route to appropriate provider and make LLM call.
+        """Route to appropriate provider and make LLM call with given reponse type.
 
         Args:
             user_msg: User message/prompt
             model: Model name - determines provider routing
             response_type: Type for structured output. Can be:
                 - None or str: returns plain text
                 - Pydantic model: returns model instance
-                - list[X], tuple[...], etc.: wraps in Pydantic, then unwraps
+                - int, bool, float, list[X], tuple[...] - simple python types
             sys_msg: Optional system message
             max_parsing_retries: Retries for structured output parsing errors
+            temperature: Sampling temperature. None uses provider default.
+                Range varies by provider (Anthropic: 0-1, others: 0-2).
+                Note: temperature=0 aims for determinism but doesn't guarantee it
+                due to GPU floating-point non-determinism and backend variability.
         """
         provider = self._get_provider(model)
         client = self._get_client(provider)
@@ -239,6 +229,7 @@ def ask_llm[T](
                     model=model,
                     client_override=client,
                     record_store=self._record_store,
+                    temperature=temperature,
                 )
                 if llm_type not in (None, str):
                     try:
@@ -277,6 +268,9 @@ def llm_consensus[T](
             integration_model: Model for integration (defaults to same as model)
             parallel: Whether to make calls in parallel (default: True)
         """
+        # Capture caller info before any calls (especially before spawning threads)
+        capture_caller_context()
+
         if num_candidates == 1:
             return self.ask_llm(
                 user_msg=user_msg,
@@ -302,8 +296,9 @@ def make_candidate_call(call_index: int) -> T:
         candidates: list[T] = []
         if parallel:
             with ThreadPoolExecutor(max_workers=num_candidates) as executor:
+                # Each thread needs its own context copy
                 futures = [
-                    executor.submit(make_candidate_call, i)
+                    executor.submit(copy_context().run, make_candidate_call, i)
                     for i in range(num_candidates)
                 ]
                 for future in as_completed(futures):
@@ -386,24 +381,23 @@ def print_usage(self, title: str | None = None, cost_format: str = "plain") -> N
 _default_client = Covenance(label="default client", records_dir=get_env_records_dir())
 
 
-def get_default_client() -> Covenance:
-    return _default_client
-
-
 def ask_llm[T](
     user_msg: str,
     model: str,
     response_type: type[T] | None = None,
     sys_msg: str | None = None,
     *,
     max_parsing_retries: int = 2,
+    temperature: float | None = None,
 ) -> T:
+    """See docstring in the class method."""
     return _default_client.ask_llm(
         user_msg=user_msg,
         model=model,
         response_type=response_type,
         sys_msg=sys_msg,
         max_parsing_retries=max_parsing_retries,
+        temperature=temperature,
     )
 
 
@@ -418,6 +412,7 @@ def llm_consensus[T](
     integration_model: str | None = None,
     parallel: bool = True,
 ) -> T:
+    """See docstring in the class method."""
     return _default_client.llm_consensus(
         user_msg=user_msg,
         model=model,
 
@@ -97,6 +97,7 @@ def ask_anthropic[T](
     *,
     client_override: Anthropic | None = None,
     record_store: "RecordStore | None" = None,
+    temperature: float | None = None,
 ) -> T:
     """Call Anthropic API with structured output using tools parameter.
 
@@ -171,6 +172,9 @@ def ask_anthropic[T](
             if sys_msg is not None:
                 api_kwargs["system"] = sys_msg
 
+            if temperature is not None:
+                api_kwargs["temperature"] = temperature
+
             response = api_client.messages.create(**api_kwargs)
 
             ended_at = datetime.now(UTC)  # Record absolute end time
 
@@ -101,6 +101,7 @@ def ask_gemini[T](
     *,
     client_override: genai.Client | None = None,
     record_store: "RecordStore | None" = None,
+    temperature: float | None = None,
 ) -> T:
     """Call Gemini API with automatic retry on rate limit errors.
 
@@ -132,6 +133,9 @@ def ask_gemini[T](
     if sys_msg:
         cfg["system_instruction"] = sys_msg
 
+    if temperature is not None:
+        cfg["temperature"] = temperature
+
     for attempt in range(max_attempts):
         try:
             if VERBOSE and attempt > 0:
 
@@ -31,6 +31,7 @@ def ask_grok[T](
     *,
     client_override: OpenAI | None = None,
     record_store: "RecordStore | None" = None,
+    temperature: float | None = None,
 ) -> T:
     """Call xAI Grok API with automatic retry."""
     api_client = client_override or client
@@ -42,4 +43,5 @@ def ask_grok[T](
         model=model,
         provider="grok",
         record_store=record_store,
+        temperature=temperature,
     )
@@ -78,6 +78,7 @@ def ask_mistral[T](
     *,
     client_override: Mistral | None = None,
     record_store: "RecordStore | None" = None,
+    temperature: float | None = None,
 ) -> T:
     """Call Mistral API with structured output using native parse method.
 
@@ -124,14 +125,15 @@ def ask_mistral[T](
                 response = api_client.chat.complete(
                     model=model,
                     messages=messages,
+                    temperature=temperature,
                 )
             else:
                 # Use native structured output via chat.parse
                 response = api_client.chat.parse(
                     model=model,
                     messages=messages,
                     response_format=response_type,
-                    # temperature=0,
+                    temperature=temperature,
                 )
 
             ended_at = datetime.now(UTC)  # Record absolute end time
 
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import re
 import time
 from datetime import UTC, datetime
@@ -9,10 +11,9 @@
 from covenance.exceptions import StructuredOutputParsingError
 from covenance.keys import get_openai_api_key, require_api_key
 from covenance.models import OpenAIModels
-from covenance.record import TokenUsage
 
 if TYPE_CHECKING:
-    from covenance.record import RecordStore
+    from covenance.record import RecordStore, TokenUsage
 
 T = TypeVar("T")
 
@@ -68,6 +69,8 @@ def _extract_openai_compatible_usage(
     response, model: str, provider: str = "openai"
 ) -> TokenUsage:
     """Extract token usage from OpenAI-compatible response."""
+    from covenance.record import TokenUsage
+
     if not hasattr(response, "usage") or response.usage is None:
         p_name = "OpenAI" if provider == "openai" else provider.capitalize()
         raise AttributeError(f"{p_name} response missing usage info for {model}")
@@ -97,7 +100,8 @@ def ask_openai_compatible_structured[T](
     sys_msg: str | None = None,
     model: str = "gpt-4o",
     provider: str = "openai",
-    record_store: "RecordStore | None" = None,
+    record_store: RecordStore | None = None,
+    temperature: float | None = None,
 ) -> T:
     """Execute structured call against an OpenAI-compatible API with retries."""
     max_attempts = 100
@@ -116,6 +120,7 @@ def ask_openai_compatible_structured[T](
                     model=model,
                     input=user_msg,
                     instructions=sys_msg,
+                    temperature=temperature,
                 )
                 output = response.output_text
             else:
@@ -124,6 +129,7 @@ def ask_openai_compatible_structured[T](
                     input=user_msg,
                     text_format=response_type,
                     instructions=sys_msg,
+                    temperature=temperature,
                 )
                 output = response.output_parsed
 
@@ -170,7 +176,8 @@ def ask_openai[T](
     model: str = OpenAIModels.gpt5.value,
     *,
     client_override: OpenAI | None = None,
-    record_store: "RecordStore | None" = None,
+    record_store: RecordStore | None = None,
+    temperature: float | None = None,
 ) -> T:
     """Call OpenAI API with automatic retry."""
     api_client = client_override or client
@@ -182,6 +189,7 @@ def ask_openai[T](
         model=model,
         provider="openai",
         record_store=record_store,
+        temperature=temperature,
     )
 
 
 
@@ -39,6 +39,7 @@ def ask_openrouter[T](
     *,
     client_override: OpenAI | None = None,
     record_store: "RecordStore | None" = None,
+    temperature: float | None = None,
 ) -> T:
     """Call OpenRouter API with automatic retry."""
     api_client = client_override or client
@@ -50,6 +51,7 @@ def ask_openrouter[T](
         model=model,
         provider="openrouter",
         record_store=record_store,
+        temperature=temperature,
     )