Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions iris/src/iris/common/memiris_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
MemoryWithRelationsDTO,
OllamaLanguageModel,
)
from memiris.api.llm_config_service import LlmConfigService
from memiris.api.memory_sleep_pipeline import (
MemorySleepPipeline,
MemorySleepPipelineBuilder,
Expand Down Expand Up @@ -49,6 +50,14 @@
In fact the actual content of the conversation is not relevant at all and should not be part of the learnings \
unless they specifically refer to the user.
Keep the learnings short and concise. Better have multiple short learnings than one long learning.
STAY ON TOPIC and only extract personal details about the user. Do not extract any information that is not a \
personal detail about the user.
You need to find high-quality personal details that can be used to improve the answers one gives to the user.
It is better to find no personal details than to find low-quality personal details that are not actually useful to \
improve the answers for the user.
AGAIN: LIMITED INTERPRETATION, ONLY PERSONAL DETAILS THAT ARE EXPLICITLY STATED OR VERY CLEARLY IMPLIED BY THE USER. \
IF IT IS NOT EXPLICITLY STATED OR VERY CLEARLY IMPLIED BY THE USER, THEN IT SHOULD NOT BE EXTRACTED AS A PERSONAL \
DETAIL. OTHERWISE, DO NOT EXTRACT ANYTHING AND RETURN AN EMPTY RESULT.
"""

_memiris_user_focus_requirements = """
Expand All @@ -62,6 +71,13 @@
unless they specifically refer to the user.
DO NOT extract how the user is communicating but rather how they expect answers to be communicated to them.
Keep the learnings short and concise. Better have multiple short learnings than one long learning.
STAY ON TOPIC and only extract requirements that the user has for answers to their questions.
You need to find high-quality requirements that can be used to improve the answers one gives to the user.
It is better to find no requirements than to find low-quality requirements that are not actually useful to \
improve the answers for the user.
AGAIN: LIMITED INTERPRETATION, ONLY REQUIREMENTS THAT ARE EXPLICITLY STATED OR VERY CLEARLY IMPLIED BY THE USER. \
IF IT IS NOT EXPLICITLY STATED OR VERY CLEARLY IMPLIED BY THE USER, THEN IT SHOULD NOT BE EXTRACTED AS A REQUIREMENT. \
OTHERWISE, DO NOT EXTRACT ANYTHING AND RETURN AN EMPTY RESULT.
"""

_memiris_user_focus_facts = """
Expand All @@ -74,11 +90,26 @@
In fact the actual content of the conversation is not relevant at all and should not be part of the learnings \
unless they specifically refer to the user.
Keep the learnings short and concise. Better have multiple short learnings than one long learning.
DO NOT extract how the user is communicating but rather what hard facts about the user can be extracted from the \
conversation.
STAY ON TOPIC and only extract facts about the user. Do not extract any information that is not a fact about the user.
You need to find high-quality facts that can be used to improve the answers one gives to the user.
It is better to find no facts than to find low-quality facts that are not actually useful to \
improve the answers for the user.
AGAIN: NO INTERPRETATION, ONLY FACTS THAT ARE EXPLICITLY STATED BY THE USER. DO NOT GUESS OR INFER ANYTHING. \
IF IT IS NOT EXPLICITLY STATED BY THE USER, THEN IT SHOULD NOT BE EXTRACTED AS A FACT. OTHERWISE, \
DO NOT EXTRACT ANYTHING AND RETURN AN EMPTY RESULT.
"""

type Tenant = str


# Configure LLM retry parameters for Memiris to handle transient errors gracefully
LlmConfigService.configure_retry_params(
max_attempts=5, initial_delay=1.0, backoff_factor=2.0
)


def setup_ollama_env_vars() -> None:
llm_manager = LlmManager()
iris_ollama_model: OllamaModel | None = None
Expand Down
34 changes: 33 additions & 1 deletion iris/src/iris/pipeline/chat/exercise_chat_agent_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def is_memiris_memory_creation_enabled(
Returns:
True if memory creation should be enabled, False otherwise.
"""
return False
return bool(state.dto.user and state.dto.user.memiris_enabled)

def get_memiris_tenant(self, dto: ExerciseChatPipelineExecutionDTO) -> str:
"""
Expand Down Expand Up @@ -205,6 +205,8 @@ def get_tools(
setattr(state, "lecture_content_storage", {})
if not hasattr(state, "faq_storage"):
setattr(state, "faq_storage", {})
if not hasattr(state, "accessed_memory_storage"):
setattr(state, "accessed_memory_storage", [])

lecture_content_storage = getattr(state, "lecture_content_storage")
faq_storage = getattr(state, "faq_storage")
Expand Down Expand Up @@ -256,6 +258,26 @@ def get_tools(
)
)

# Add Memiris tools if available
allow_memiris_tool = bool(
state.dto.user
and state.dto.user.memiris_enabled
and state.memiris_wrapper
and state.memiris_wrapper.has_memories()
)

if allow_memiris_tool and state.memiris_wrapper:
tool_list.append(
state.memiris_wrapper.create_tool_memory_search(
getattr(state, "accessed_memory_storage", [])
)
)
tool_list.append(
state.memiris_wrapper.create_tool_find_similar_memories(
getattr(state, "accessed_memory_storage", [])
)
)

return tool_list

def build_system_message(
Expand Down Expand Up @@ -293,6 +315,14 @@ def build_system_message(
custom_instructions=dto.custom_instructions or ""
)

# Get Memiris availability
allow_memiris_tool = bool(
state.dto.user
and state.dto.user.memiris_enabled
and state.memiris_wrapper
and state.memiris_wrapper.has_memories()
)

# Build system prompt using Jinja2 template
template_context = {
"current_date": datetime_to_string(datetime.now(tz=pytz.UTC)),
Expand All @@ -304,6 +334,7 @@ def build_system_message(
"has_query": query is not None,
"has_chat_history": len(state.message_history) > 0,
"custom_instructions": custom_instructions,
"allow_memiris_tool": allow_memiris_tool,
}

return self.system_prompt_template.render(template_context)
Expand Down Expand Up @@ -358,6 +389,7 @@ def post_agent_hook(
"Done!",
final_result=result,
tokens=state.tokens,
accessed_memories=getattr(state, "accessed_memory_storage", []),
session_title=session_title,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ You have access to Memiris, an advanced memory system that stores and retrieves
• You do not have writing access to Memiris, so you cannot add new memories or modify existing ones. Do not promise the student that you will remember something for them.
• Memiris will automatically remember details about the conversation you are having with the student. You cannot control this process.
• You must make use of Memiris! Always take your time to check for relevant memories before responding to the student.

Some examples of how to use Memiris:
• Search for a memory about the student's preferred learning style to suggest study strategies.
• Search for a memory about the student's past struggles with a topic to provide encouragement and resources.
• Search for a memory about the student's goals to help them reflect on their progress toward those goals.
• Search for a memory about the student's preferences for receiving feedback to tailor your response accordingly.
• Search for a memory about the student's preferences for receiving answers. For example, if the student has a memory that they prefer concise answers, you can use that information to provide a more concise response. Or if they prefer detailed and realistic examples, you can use that information to provide a more detailed response with realistic examples.
{% else %}
Memiris is not available or not enabled for this course. You MUST NOT attempt to retrieve or reference any memories about the student. Do not make assumptions about the student's past interactions or preferences based on the absence of Memiris.
{% endif %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,28 @@ Scenario 4: Student is asking a general question
2. Since it's a general question, it might not be necessary to use any tools. I can directly provide a response to the student's question.
3. After understanding the question, I should provide a response to the student's question. I can provide syntax examples, hints and guidance, but I do never provide direct solutions.

{# Memiris Block #}
{% if allow_memiris_tool %}
## Memiris - Personal Memory System
You have access to Memiris, an advanced memory system that stores and retrieves information about the student.
• Use Memiris to recall facts, preferences, personal details, and behavior patterns that the student has shared in previous conversations.
• This information can help you personalize your responses and provide more relevant support.
• Memiris provides several tools to interact with it. Use them according to their descriptions.
• You are allowed to provide the student with details about what is stored in Memiris.
• You do not have writing access to Memiris, so you cannot add new memories or modify existing ones. Do not promise the student that you will remember something for them.
• Memiris will automatically remember details about the conversation you are having with the student. You cannot control this process.
• You must make use of Memiris! Always take your time to check for relevant memories before responding to the student.

Some examples of how to use Memiris:
• Search for a memory about the student's preferred learning style to suggest study strategies.
• Search for a memory about the student's past struggles with a topic to provide encouragement and resources.
• Search for a memory about the student's coding preferences or patterns to provide personalized hints.
• Search for a memory about the student's preferences for receiving feedback to tailor your response accordingly.
• Search for a memory about the student's preferences for receiving answers. For example, if the student has a memory that they prefer concise answers, you can use that information to provide a more concise response. Or if they prefer detailed and realistic examples, you can use that information to provide a more detailed response with realistic examples.
{% else %}
Memiris is not available or not enabled for this student. You MUST NOT attempt to retrieve or reference any memories about the student. Do not make assumptions about the student's past interactions or preferences based on the absence of Memiris.
{% endif %}

## Exercise Context
- **Exercise Title:** {{ exercise_title }}
- **Problem Statement:** {{ problem_statement }}
Expand Down
97 changes: 97 additions & 0 deletions memiris/src/memiris/api/llm_config_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""
API-level service for configuring global LLM behaviour.

Example::

from memiris.api.llm_config_service import LlmConfigService
from memiris.llm.retry_config import RetryConfig

LlmConfigService.configure_retry(
RetryConfig(max_attempts=5, initial_delay=2.0, backoff_factor=2.0)
)
"""

from typing import Optional, Tuple, Type

from memiris.llm.retry_config import (
RetryConfig,
get_retry_config,
set_retry_config,
)


class LlmConfigService:
"""Service for reading and updating the global LLM configuration.

All methods are class-methods so no instantiation is required.
"""

@classmethod
def configure_retry(
cls,
config: RetryConfig,
) -> None:
"""Replace the process-wide LLM retry configuration.

Parameters
----------
config:
The new :class:`~memiris.llm.retry_config.RetryConfig` to apply
to every subsequent LLM call in the process.
"""
set_retry_config(config)

@classmethod
def configure_retry_params(
cls,
*,
max_attempts: Optional[int] = None,
initial_delay: Optional[float] = None,
backoff_factor: Optional[float] = None,
exceptions: Optional[Tuple[Type[BaseException], ...]] = None,
) -> None:
"""Update individual retry parameters while keeping the rest unchanged.

Only the keyword arguments that are explicitly provided will be
changed; all others are taken from the currently active config.

Parameters
----------
max_attempts:
Total number of attempts (1 = no retry).
initial_delay:
Seconds to wait before the first retry.
backoff_factor:
Multiplier applied to the delay after each failed attempt.
exceptions:
Tuple of exception types that should trigger a retry.
"""
current = get_retry_config()
set_retry_config(
RetryConfig(
max_attempts=(
max_attempts if max_attempts is not None else current.max_attempts
),
initial_delay=(
initial_delay
if initial_delay is not None
else current.initial_delay
),
backoff_factor=(
backoff_factor
if backoff_factor is not None
else current.backoff_factor
),
exceptions=exceptions if exceptions is not None else current.exceptions,
)
)

@classmethod
def get_retry_config(cls) -> RetryConfig:
"""Return the currently active process-wide retry configuration."""
return get_retry_config()

@classmethod
def reset_retry_config(cls) -> None:
"""Reset the retry configuration to the built-in defaults."""
set_retry_config(RetryConfig())
21 changes: 12 additions & 9 deletions memiris/src/memiris/llm/ollama_language_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
WrappedChatResponse,
WrappedEmbeddingResponse,
)
from memiris.llm.retry_config import call_with_retry


@dataclass
Expand Down Expand Up @@ -63,20 +64,22 @@ def chat(
model_parameters=options,
) as generation:
think = "high" if self._model.startswith("gpt-oss") else None
response = self._client.chat(
self._model,
messages=messages,
format=response_format,
keep_alive=keep_alive,
options=options,
think=think, # type: ignore
**kwargs,
response = call_with_retry(
lambda: self._client.chat(
self._model,
messages=messages,
format=response_format,
keep_alive=keep_alive,
options=options,
think=think, # type: ignore
**kwargs,
)
)
generation.update(output=response.message, metadata=response)
return WrappedChatResponse.from_ollama_response(response)

def embed(self, text: str) -> WrappedEmbeddingResponse:
response = self._client.embed(self._model, text)
response = call_with_retry(lambda: self._client.embed(self._model, text))
return WrappedEmbeddingResponse.from_ollama_response(response)

def langchain_client(self) -> ChatOllama:
Expand Down
17 changes: 11 additions & 6 deletions memiris/src/memiris/llm/openai_language_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
WrappedChatResponse,
WrappedEmbeddingResponse,
)
from memiris.llm.retry_config import call_with_retry


class OpenAiLanguageModel(AbstractLanguageModel):
Expand Down Expand Up @@ -207,11 +208,13 @@ def _enforce_strict_schema(s: Any) -> None:
payload.update(kwargs)
payload = {k: v for k, v in payload.items() if v is not None}

resp: ChatCompletion = self._client.chat.completions.create(
model=self._model,
messages=normalized,
response_format=rf or Omit(),
**payload,
resp: ChatCompletion = call_with_retry(
lambda: self._client.chat.completions.create(
model=self._model,
messages=normalized,
response_format=rf or Omit(),
**payload,
)
)

# If we wrapped a non-object schema, unwrap the key so downstream parsers
Expand All @@ -237,7 +240,9 @@ def _enforce_strict_schema(s: Any) -> None:
return WrappedChatResponse.from_openai_chat(resp)

def embed(self, text: str) -> WrappedEmbeddingResponse:
resp = self._client.embeddings.create(model=self._model, input=text)
resp = call_with_retry(
lambda: self._client.embeddings.create(model=self._model, input=text)
)
return WrappedEmbeddingResponse.from_openai_embedding(resp)

def langchain_client(self) -> Union[ChatOpenAI, AzureChatOpenAI]:
Expand Down
Loading
Loading