ls1intum · Hialus · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026 · Mar 12, 2026
@@ -20,6 +20,7 @@
     MemoryWithRelationsDTO,
     OllamaLanguageModel,
 )
+from memiris.api.llm_config_service import LlmConfigService
 from memiris.api.memory_sleep_pipeline import (
     MemorySleepPipeline,
     MemorySleepPipelineBuilder,
@@ -49,6 +50,14 @@
 In fact the actual content of the conversation is not relevant at all and should not be part of the learnings \
 unless they specifically refer to the user.
 Keep the learnings short and concise. Better have multiple short learnings than one long learning.
+STAY ON TOPIC and only extract personal details about the user. Do not extract any information that is not a \
+personal detail about the user.
+You need to find high-quality personal details that can be used to improve the answers one gives to the user.
+It is better to find no personal details than to find low-quality personal details that are not actually useful to \
+improve the answers for the user.
+AGAIN: LIMITED INTERPRETATION, ONLY PERSONAL DETAILS THAT ARE EXPLICITLY STATED OR VERY CLEARLY IMPLIED BY THE USER. \
+IF IT IS NOT EXPLICITLY STATED OR VERY CLEARLY IMPLIED BY THE USER, THEN IT SHOULD NOT BE EXTRACTED AS A PERSONAL \
+DETAIL. OTHERWISE, DO NOT EXTRACT ANYTHING AND RETURN AN EMPTY RESULT.
 """
 
 _memiris_user_focus_requirements = """
@@ -62,6 +71,13 @@
 unless they specifically refer to the user.
 DO NOT extract how the user is communicating but rather how they expect answers to be communicated to them.
 Keep the learnings short and concise. Better have multiple short learnings than one long learning.
+STAY ON TOPIC and only extract requirements that the user has for answers to their questions.
+You need to find high-quality requirements that can be used to improve the answers one gives to the user.
+It is better to find no requirements than to find low-quality requirements that are not actually useful to \
+improve the answers for the user.
+AGAIN: LIMITED INTERPRETATION, ONLY REQUIREMENTS THAT ARE EXPLICITLY STATED OR VERY CLEARLY IMPLIED BY THE USER. \
+IF IT IS NOT EXPLICITLY STATED OR VERY CLEARLY IMPLIED BY THE USER, THEN IT SHOULD NOT BE EXTRACTED AS A REQUIREMENT. \
+OTHERWISE, DO NOT EXTRACT ANYTHING AND RETURN AN EMPTY RESULT.
 """
 
 _memiris_user_focus_facts = """
@@ -74,11 +90,26 @@
 In fact the actual content of the conversation is not relevant at all and should not be part of the learnings \
 unless they specifically refer to the user.
 Keep the learnings short and concise. Better have multiple short learnings than one long learning.
+DO NOT extract how the user is communicating but rather what hard facts about the user can be extracted from the \
+conversation.
+STAY ON TOPIC and only extract facts about the user. Do not extract any information that is not a fact about the user.
+You need to find high-quality facts that can be used to improve the answers one gives to the user.
+It is better to find no facts than to find low-quality facts that are not actually useful to \
+improve the answers for the user.
+AGAIN: NO INTERPRETATION, ONLY FACTS THAT ARE EXPLICITLY STATED BY THE USER. DO NOT GUESS OR INFER ANYTHING. \
+IF IT IS NOT EXPLICITLY STATED BY THE USER, THEN IT SHOULD NOT BE EXTRACTED AS A FACT. OTHERWISE, \
+DO NOT EXTRACT ANYTHING AND RETURN AN EMPTY RESULT.
 """
 
 type Tenant = str
 
 
+# Configure LLM retry parameters for Memiris to handle transient errors gracefully
+LlmConfigService.configure_retry_params(
+    max_attempts=5, initial_delay=1.0, backoff_factor=2.0
+)
+
+
 def setup_ollama_env_vars() -> None:
     llm_manager = LlmManager()
     iris_ollama_model: OllamaModel | None = None

@@ -143,7 +143,7 @@ def is_memiris_memory_creation_enabled(
         Returns:
             True if memory creation should be enabled, False otherwise.
         """
-        return False
+        return bool(state.dto.user and state.dto.user.memiris_enabled)
 
     def get_memiris_tenant(self, dto: ExerciseChatPipelineExecutionDTO) -> str:
         """
@@ -205,6 +205,8 @@ def get_tools(
             setattr(state, "lecture_content_storage", {})
         if not hasattr(state, "faq_storage"):
             setattr(state, "faq_storage", {})
+        if not hasattr(state, "accessed_memory_storage"):
+            setattr(state, "accessed_memory_storage", [])
 
         lecture_content_storage = getattr(state, "lecture_content_storage")
         faq_storage = getattr(state, "faq_storage")
@@ -256,6 +258,26 @@ def get_tools(
                 )
             )
 
+        # Add Memiris tools if available
+        allow_memiris_tool = bool(
+            state.dto.user
+            and state.dto.user.memiris_enabled
+            and state.memiris_wrapper
+            and state.memiris_wrapper.has_memories()
+        )
+
+        if allow_memiris_tool and state.memiris_wrapper:
+            tool_list.append(
+                state.memiris_wrapper.create_tool_memory_search(
+                    getattr(state, "accessed_memory_storage", [])
+                )
+            )
+            tool_list.append(
+                state.memiris_wrapper.create_tool_find_similar_memories(
+                    getattr(state, "accessed_memory_storage", [])
+                )
+            )
+
         return tool_list
 
     def build_system_message(
@@ -293,6 +315,14 @@ def build_system_message(
             custom_instructions=dto.custom_instructions or ""
         )
 
+        # Get Memiris availability
+        allow_memiris_tool = bool(
+            state.dto.user
+            and state.dto.user.memiris_enabled
+            and state.memiris_wrapper
+            and state.memiris_wrapper.has_memories()
+        )
+
         # Build system prompt using Jinja2 template
         template_context = {
             "current_date": datetime_to_string(datetime.now(tz=pytz.UTC)),
@@ -304,6 +334,7 @@ def build_system_message(
             "has_query": query is not None,
             "has_chat_history": len(state.message_history) > 0,
             "custom_instructions": custom_instructions,
+            "allow_memiris_tool": allow_memiris_tool,
         }
 
         return self.system_prompt_template.render(template_context)
@@ -358,6 +389,7 @@ def post_agent_hook(
                 "Done!",
                 final_result=result,
                 tokens=state.tokens,
+                accessed_memories=getattr(state, "accessed_memory_storage", []),
                 session_title=session_title,
             )
 

@@ -81,6 +81,13 @@ You have access to Memiris, an advanced memory system that stores and retrieves
 • You do not have writing access to Memiris, so you cannot add new memories or modify existing ones. Do not promise the student that you will remember something for them.
 • Memiris will automatically remember details about the conversation you are having with the student. You cannot control this process.
 • You must make use of Memiris! Always take your time to check for relevant memories before responding to the student.
+
+Some examples of how to use Memiris:
+• Search for a memory about the student's preferred learning style to suggest study strategies.
+• Search for a memory about the student's past struggles with a topic to provide encouragement and resources.
+• Search for a memory about the student's goals to help them reflect on their progress toward those goals.
+• Search for a memory about the student's preferences for receiving feedback to tailor your response accordingly.
+• Search for a memory about the student's preferences for receiving answers. For example, if the student has a memory that they prefer concise answers, you can use that information to provide a more concise response. Or if they prefer detailed and realistic examples, you can use that information to provide a more detailed response with realistic examples.
 {% else %}
 Memiris is not available or not enabled for this course. You MUST NOT attempt to retrieve or reference any memories about the student. Do not make assumptions about the student's past interactions or preferences based on the absence of Memiris.
 {% endif %}

@@ -88,6 +88,28 @@ Scenario 4: Student is asking a general question
 2. Since it's a general question, it might not be necessary to use any tools. I can directly provide a response to the student's question.
 3. After understanding the question, I should provide a response to the student's question. I can provide syntax examples, hints and guidance, but I do never provide direct solutions.
 
+{# Memiris Block #}
+{% if allow_memiris_tool %}
+## Memiris - Personal Memory System
+You have access to Memiris, an advanced memory system that stores and retrieves information about the student.
+• Use Memiris to recall facts, preferences, personal details, and behavior patterns that the student has shared in previous conversations.
+• This information can help you personalize your responses and provide more relevant support.
+• Memiris provides several tools to interact with it. Use them according to their descriptions.
+• You are allowed to provide the student with details about what is stored in Memiris.
+• You do not have writing access to Memiris, so you cannot add new memories or modify existing ones. Do not promise the student that you will remember something for them.
+• Memiris will automatically remember details about the conversation you are having with the student. You cannot control this process.
+• You must make use of Memiris! Always take your time to check for relevant memories before responding to the student.
+
+Some examples of how to use Memiris:
+• Search for a memory about the student's preferred learning style to suggest study strategies.
+• Search for a memory about the student's past struggles with a topic to provide encouragement and resources.
+• Search for a memory about the student's coding preferences or patterns to provide personalized hints.
+• Search for a memory about the student's preferences for receiving feedback to tailor your response accordingly.
+• Search for a memory about the student's preferences for receiving answers. For example, if the student has a memory that they prefer concise answers, you can use that information to provide a more concise response. Or if they prefer detailed and realistic examples, you can use that information to provide a more detailed response with realistic examples.
+{% else %}
+Memiris is not available or not enabled for this student. You MUST NOT attempt to retrieve or reference any memories about the student. Do not make assumptions about the student's past interactions or preferences based on the absence of Memiris.
+{% endif %}
+
 ## Exercise Context
 - **Exercise Title:** {{ exercise_title }}
 - **Problem Statement:** {{ problem_statement }}

@@ -0,0 +1,97 @@
+"""
+API-level service for configuring global LLM behaviour.
+
+Example::
+
+    from memiris.api.llm_config_service import LlmConfigService
+    from memiris.llm.retry_config import RetryConfig
+
+    LlmConfigService.configure_retry(
+        RetryConfig(max_attempts=5, initial_delay=2.0, backoff_factor=2.0)
+    )
+"""
+
+from typing import Optional, Tuple, Type
+
+from memiris.llm.retry_config import (
+    RetryConfig,
+    get_retry_config,
+    set_retry_config,
+)
+
+
+class LlmConfigService:
+    """Service for reading and updating the global LLM configuration.
+
+    All methods are class-methods so no instantiation is required.
+    """
+
+    @classmethod
+    def configure_retry(
+        cls,
+        config: RetryConfig,
+    ) -> None:
+        """Replace the process-wide LLM retry configuration.
+
+        Parameters
+        ----------
+        config:
+            The new :class:`~memiris.llm.retry_config.RetryConfig` to apply
+            to every subsequent LLM call in the process.
+        """
+        set_retry_config(config)
+
+    @classmethod
+    def configure_retry_params(
+        cls,
+        *,
+        max_attempts: Optional[int] = None,
+        initial_delay: Optional[float] = None,
+        backoff_factor: Optional[float] = None,
+        exceptions: Optional[Tuple[Type[BaseException], ...]] = None,
+    ) -> None:
+        """Update individual retry parameters while keeping the rest unchanged.
+
+        Only the keyword arguments that are explicitly provided will be
+        changed; all others are taken from the currently active config.
+
+        Parameters
+        ----------
+        max_attempts:
+            Total number of attempts (1 = no retry).
+        initial_delay:
+            Seconds to wait before the first retry.
+        backoff_factor:
+            Multiplier applied to the delay after each failed attempt.
+        exceptions:
+            Tuple of exception types that should trigger a retry.
+        """
+        current = get_retry_config()
+        set_retry_config(
+            RetryConfig(
+                max_attempts=(
+                    max_attempts if max_attempts is not None else current.max_attempts
+                ),
+                initial_delay=(
+                    initial_delay
+                    if initial_delay is not None
+                    else current.initial_delay
+                ),
+                backoff_factor=(
+                    backoff_factor
+                    if backoff_factor is not None
+                    else current.backoff_factor
+                ),
+                exceptions=exceptions if exceptions is not None else current.exceptions,
+            )
+        )
+
+    @classmethod
+    def get_retry_config(cls) -> RetryConfig:
+        """Return the currently active process-wide retry configuration."""
+        return get_retry_config()
+
+    @classmethod
+    def reset_retry_config(cls) -> None:
+        """Reset the retry configuration to the built-in defaults."""
+        set_retry_config(RetryConfig())
@@ -16,6 +16,7 @@
     WrappedChatResponse,
     WrappedEmbeddingResponse,
 )
+from memiris.llm.retry_config import call_with_retry
 
 
 @dataclass
@@ -63,20 +64,22 @@ def chat(
             model_parameters=options,
         ) as generation:
             think = "high" if self._model.startswith("gpt-oss") else None
-            response = self._client.chat(
-                self._model,
-                messages=messages,
-                format=response_format,
-                keep_alive=keep_alive,
-                options=options,
-                think=think,  # type: ignore
-                **kwargs,
+            response = call_with_retry(
+                lambda: self._client.chat(
+                    self._model,
+                    messages=messages,
+                    format=response_format,
+                    keep_alive=keep_alive,
+                    options=options,
+                    think=think,  # type: ignore
+                    **kwargs,
+                )
             )
             generation.update(output=response.message, metadata=response)
         return WrappedChatResponse.from_ollama_response(response)
 
     def embed(self, text: str) -> WrappedEmbeddingResponse:
-        response = self._client.embed(self._model, text)
+        response = call_with_retry(lambda: self._client.embed(self._model, text))
         return WrappedEmbeddingResponse.from_ollama_response(response)
 
     def langchain_client(self) -> ChatOllama:

@@ -27,6 +27,7 @@
     WrappedChatResponse,
     WrappedEmbeddingResponse,
 )
+from memiris.llm.retry_config import call_with_retry
 
 
 class OpenAiLanguageModel(AbstractLanguageModel):
@@ -207,11 +208,13 @@ def _enforce_strict_schema(s: Any) -> None:
             payload.update(kwargs)
         payload = {k: v for k, v in payload.items() if v is not None}
 
-        resp: ChatCompletion = self._client.chat.completions.create(
-            model=self._model,
-            messages=normalized,
-            response_format=rf or Omit(),
-            **payload,
+        resp: ChatCompletion = call_with_retry(
+            lambda: self._client.chat.completions.create(
+                model=self._model,
+                messages=normalized,
+                response_format=rf or Omit(),
+                **payload,
+            )
         )
 
         # If we wrapped a non-object schema, unwrap the key so downstream parsers
@@ -237,7 +240,9 @@ def _enforce_strict_schema(s: Any) -> None:
         return WrappedChatResponse.from_openai_chat(resp)
 
     def embed(self, text: str) -> WrappedEmbeddingResponse:
-        resp = self._client.embeddings.create(model=self._model, input=text)
+        resp = call_with_retry(
+            lambda: self._client.embeddings.create(model=self._model, input=text)
+        )
         return WrappedEmbeddingResponse.from_openai_embedding(resp)
 
     def langchain_client(self) -> Union[ChatOpenAI, AzureChatOpenAI]: