fixing QWQ model, introduced error handling for error responses in Ollama (#274)

Rizzen · web-flow · commit d2b34f2467a2 · 2025-06-16T22:55:20.000+02:00
diff --git a/examples/src/main/kotlin/ai/koog/agents/example/calculator/Calculator.kt b/examples/src/main/kotlin/ai/koog/agents/example/calculator/Calculator.kt
@@ -4,10 +4,6 @@ package ai.koog.agents.example.calculator
 
 import ai.koog.agents.core.agent.AIAgent
 import ai.koog.agents.core.agent.config.AIAgentConfig
-import ai.koog.agents.core.dsl.builder.forwardTo
-import ai.koog.agents.core.dsl.builder.strategy
-import ai.koog.agents.core.dsl.extension.*
-import ai.koog.agents.core.environment.ReceivedToolResult
 import ai.koog.agents.core.tools.Tool
 import ai.koog.agents.core.tools.ToolArgs
 import ai.koog.agents.core.tools.ToolRegistry
@@ -24,10 +20,6 @@ import kotlinx.coroutines.runBlocking
 import kotlin.uuid.ExperimentalUuidApi
 import kotlin.uuid.Uuid
 
-
-// Example threshold
-private const val MAX_TOKENS_THRESHOLD = 1000
-
 fun main() = runBlocking {
     val executor: PromptExecutor = simpleOpenAIExecutor(ApiKeyService.openAIApiKey)
 
@@ -39,49 +31,6 @@ fun main() = runBlocking {
         tools(CalculatorTools().asTools())
     }
 
-    val strategy = strategy("test") {
-        val nodeCallLLM by nodeLLMRequestMultiple()
-        val nodeExecuteToolMultiple by nodeExecuteMultipleTools(parallelTools = true)
-        val nodeSendToolResultMultiple by nodeLLMSendMultipleToolResults()
-        val nodeCompressHistory by nodeLLMCompressHistory<List<ReceivedToolResult>>()
-
-        edge(nodeStart forwardTo nodeCallLLM)
-
-        edge(
-            (nodeCallLLM forwardTo nodeFinish)
-                    transformed { it.first() }
-                    onAssistantMessage { true }
-        )
-
-        edge(
-            (nodeCallLLM forwardTo nodeExecuteToolMultiple)
-                    onMultipleToolCalls { true }
-        )
-
-        edge(
-            (nodeExecuteToolMultiple forwardTo nodeCompressHistory)
-                    onCondition { llm.readSession { prompt.latestTokenUsage > MAX_TOKENS_THRESHOLD } }
-        )
-
-        edge(nodeCompressHistory forwardTo nodeSendToolResultMultiple)
-
-        edge(
-            (nodeExecuteToolMultiple forwardTo nodeSendToolResultMultiple)
-                    onCondition { llm.readSession { prompt.latestTokenUsage <= MAX_TOKENS_THRESHOLD } }
-        )
-
-        edge(
-            (nodeSendToolResultMultiple forwardTo nodeExecuteToolMultiple)
-                    onMultipleToolCalls { true }
-        )
-
-        edge(
-            (nodeSendToolResultMultiple forwardTo nodeFinish)
-                    transformed { it.first() }
-                    onAssistantMessage { true }
-        )
-    }
-
     // Create agent config with proper prompt
     val agentConfig = AIAgentConfig(
         prompt = prompt("test") {
@@ -94,7 +43,7 @@ fun main() = runBlocking {
     // Create the runner
     val agent = AIAgent(
         promptExecutor = executor,
-        strategy = strategy,
+        strategy = CalculatorStrategy.strategy,
         agentConfig = agentConfig,
         toolRegistry = toolRegistry
     ) {
diff --git a/examples/src/main/kotlin/ai/koog/agents/example/calculator/CalculatorTools.kt b/examples/src/main/kotlin/ai/koog/agents/example/calculator/CalculatorTools.kt
@@ -1,9 +1,19 @@
 package ai.koog.agents.example.calculator
 
+import ai.koog.agents.core.dsl.builder.forwardTo
+import ai.koog.agents.core.dsl.builder.strategy
+import ai.koog.agents.core.dsl.extension.nodeExecuteMultipleTools
+import ai.koog.agents.core.dsl.extension.nodeLLMCompressHistory
+import ai.koog.agents.core.dsl.extension.nodeLLMRequestMultiple
+import ai.koog.agents.core.dsl.extension.nodeLLMSendMultipleToolResults
+import ai.koog.agents.core.dsl.extension.onAssistantMessage
+import ai.koog.agents.core.dsl.extension.onMultipleToolCalls
+import ai.koog.agents.core.environment.ReceivedToolResult
 import ai.koog.agents.core.tools.annotations.LLMDescription
 import ai.koog.agents.core.tools.annotations.Tool
 import ai.koog.agents.core.tools.reflect.ToolSet
 
+@Suppress("unused")
 @LLMDescription("Tools for basic calculator operations")
 class CalculatorTools : ToolSet {
 
@@ -55,3 +65,50 @@ class CalculatorTools : ToolSet {
         return (a * b).toString()
     }
 }
+
+object CalculatorStrategy {
+    private const val MAX_TOKENS_THRESHOLD = 1000
+
+    val strategy = strategy("test") {
+        val nodeCallLLM by nodeLLMRequestMultiple()
+        val nodeExecuteToolMultiple by nodeExecuteMultipleTools(parallelTools = true)
+        val nodeSendToolResultMultiple by nodeLLMSendMultipleToolResults()
+        val nodeCompressHistory by nodeLLMCompressHistory<List<ReceivedToolResult>>()
+
+        edge(nodeStart forwardTo nodeCallLLM)
+
+        edge(
+            (nodeCallLLM forwardTo nodeFinish)
+                    transformed { it.first() }
+                    onAssistantMessage { true }
+        )
+
+        edge(
+            (nodeCallLLM forwardTo nodeExecuteToolMultiple)
+                    onMultipleToolCalls { true }
+        )
+
+        edge(
+            (nodeExecuteToolMultiple forwardTo nodeCompressHistory)
+                    onCondition { llm.readSession { prompt.latestTokenUsage > MAX_TOKENS_THRESHOLD } }
+        )
+
+        edge(nodeCompressHistory forwardTo nodeSendToolResultMultiple)
+
+        edge(
+            (nodeExecuteToolMultiple forwardTo nodeSendToolResultMultiple)
+                    onCondition { llm.readSession { prompt.latestTokenUsage <= MAX_TOKENS_THRESHOLD } }
+        )
+
+        edge(
+            (nodeSendToolResultMultiple forwardTo nodeExecuteToolMultiple)
+                    onMultipleToolCalls { true }
+        )
+
+        edge(
+            (nodeSendToolResultMultiple forwardTo nodeFinish)
+                    transformed { it.first() }
+                    onAssistantMessage { true }
+        )
+    }
+}
diff --git a/examples/src/main/kotlin/ai/koog/agents/example/calculator/OllamaCalculatorExample.kt b/examples/src/main/kotlin/ai/koog/agents/example/calculator/OllamaCalculatorExample.kt
@@ -0,0 +1,65 @@
+package ai.koog.agents.example.calculator
+
+import ai.koog.agents.core.agent.AIAgent
+import ai.koog.agents.core.agent.config.AIAgentConfig
+import ai.koog.agents.core.tools.Tool
+import ai.koog.agents.core.tools.ToolArgs
+import ai.koog.agents.core.tools.ToolRegistry
+import ai.koog.agents.core.tools.reflect.asTools
+import ai.koog.agents.ext.tool.AskUser
+import ai.koog.agents.ext.tool.SayToUser
+import ai.koog.agents.features.eventHandler.feature.handleEvents
+import ai.koog.prompt.dsl.prompt
+import ai.koog.prompt.executor.llms.all.simpleOllamaAIExecutor
+import ai.koog.prompt.executor.model.PromptExecutor
+import ai.koog.prompt.llm.OllamaModels
+import kotlinx.coroutines.runBlocking
+import kotlin.uuid.ExperimentalUuidApi
+import kotlin.uuid.Uuid
+
+@OptIn(ExperimentalUuidApi::class)
+fun main() = runBlocking {
+    val executor: PromptExecutor = simpleOllamaAIExecutor()
+
+    // Create tool registry with calculator tools
+    val toolRegistry = ToolRegistry {
+        // Special tool, required with this type of agent.
+        tool(AskUser)
+        tool(SayToUser)
+        tools(CalculatorTools().asTools())
+    }
+
+    // Create agent config with proper prompt
+    val agentConfig = AIAgentConfig(
+        prompt = prompt("test") {
+            system("You are a calculator.")
+        },
+        model = OllamaModels.Meta.LLAMA_3_2,
+        maxAgentIterations = 50
+    )
+
+    val agent = AIAgent(
+        promptExecutor = executor,
+        strategy = CalculatorStrategy.strategy,
+        agentConfig = agentConfig,
+        toolRegistry = toolRegistry
+    ) {
+        handleEvents {
+            onToolCall { tool: Tool<*, *>, toolArgs: ToolArgs ->
+                println("Tool called: tool ${tool.name}, args $toolArgs")
+            }
+
+            onAgentRunError { strategyName: String, sessionUuid: Uuid?, throwable: Throwable ->
+                println("An error occurred: ${throwable.message}\n${throwable.stackTraceToString()}")
+            }
+
+            onAgentFinished { strategyName: String, result: String? ->
+                println("Result: $result")
+            }
+        }
+    }
+
+    runBlocking {
+        agent.run("(10 + 20) * (5 + 5) / (2 - 11)")
+    }
+}
diff --git a/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/OllamaAgentIntegrationTest.kt b/integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/OllamaAgentIntegrationTest.kt
@@ -2,9 +2,6 @@
 
 package ai.koog.integration.tests
 
-import ai.koog.integration.tests.tools.AnswerVerificationTool
-import ai.koog.integration.tests.tools.GenericParameterTool
-import ai.koog.integration.tests.tools.GeographyQueryTool
 import ai.koog.agents.core.agent.AIAgent
 import ai.koog.agents.core.agent.config.AIAgentConfig
 import ai.koog.agents.core.agent.entity.AIAgentStrategy
@@ -13,6 +10,9 @@ import ai.koog.agents.core.dsl.builder.strategy
 import ai.koog.agents.core.dsl.extension.*
 import ai.koog.agents.core.tools.ToolRegistry
 import ai.koog.agents.features.eventHandler.feature.EventHandler
+import ai.koog.integration.tests.tools.AnswerVerificationTool
+import ai.koog.integration.tests.tools.GenericParameterTool
+import ai.koog.integration.tests.tools.GeographyQueryTool
 import ai.koog.integration.tests.utils.annotations.Retry
 import ai.koog.prompt.dsl.prompt
 import ai.koog.prompt.executor.model.PromptExecutor
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/src/commonMain/kotlin/ai/koog/prompt/executor/ollama/client/OllamaClient.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/src/commonMain/kotlin/ai/koog/prompt/executor/ollama/client/OllamaClient.kt
@@ -80,7 +80,7 @@ public class OllamaClient(
     ): List<Message.Response> {
         require(model.provider == LLMProvider.Ollama) { "Model not supported by Ollama" }
 
-        val response: OllamaChatResponseDTO = client.post(DEFAULT_MESSAGE_PATH) {
+        val response = client.post(DEFAULT_MESSAGE_PATH) {
             setBody(
                 OllamaChatRequestDTO(
                     model = model.id,
@@ -90,13 +90,18 @@ public class OllamaClient(
                     options = prompt.extractOllamaOptions(),
                     stream = false,
                 ))
-        }.body<OllamaChatResponseDTO>()
+        }
 
-        return parseResponse(response, prompt)
+        if (response.status.isSuccess()) {
+            return parseResponse(response.body<OllamaChatResponseDTO>())
+        } else {
+            val errorResponse = response.body<OllamaErrorResponseDTO>()
+            logger.error { "Ollama error: ${errorResponse.error}" }
+            throw RuntimeException("Ollama API error: ${errorResponse.error}")
+        }
     }
 
-
-    private fun parseResponse(response: OllamaChatResponseDTO, prompt: Prompt): List<Message.Response> {
+    private fun parseResponse(response: OllamaChatResponseDTO): List<Message.Response> {
         val messages = response.message ?: return emptyList()
         val content = messages.content
         val toolCalls = messages.toolCalls ?: emptyList()
diff --git a/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/src/commonMain/kotlin/ai/koog/prompt/executor/ollama/client/dto/OllamaModels.kt b/prompt/prompt-executor/prompt-executor-clients/prompt-executor-ollama-client/src/commonMain/kotlin/ai/koog/prompt/executor/ollama/client/dto/OllamaModels.kt
@@ -86,6 +86,12 @@ internal data class OllamaChatResponseDTO(
     @SerialName("eval_count") val evalCount: Int? = null
 )
 
+/**
+ * Error response from the /api/chat endpoint.
+ */
+@Serializable
+internal data class OllamaErrorResponseDTO(val error: String)
+
 /**
  * Represents a request to generate an embedding using a specific model.
  *
diff --git a/prompt/prompt-llm/src/commonMain/kotlin/ai/koog/prompt/llm/OllamaModels.kt b/prompt/prompt-llm/src/commonMain/kotlin/ai/koog/prompt/llm/OllamaModels.kt
@@ -172,6 +172,26 @@ public object OllamaModels {
             )
         )
 
+        /**
+         * Represents the QWQ model with 32 billion parameters.
+         *
+         * This predefined instance of `LLModel` is provided by Alibaba and supports the following capabilities:
+         * - `Temperature`: Allows adjustment of the temperature setting for controlling the randomness in responses.
+         * - `Schema.JSON.Simple`: Supports tasks requiring JSON schema validation and handling in a simplified manner.
+         * - `Tools`: Enables interaction with external tools or functionalities within the model's ecosystem.
+         *
+         * The model is identified by the unique ID "qwq:32b" and categorized under the Ollama provider.
+         */
+        public val QWQ_32B: LLModel = LLModel(
+            provider = LLMProvider.Ollama,
+            id = "qwq:32b",
+            capabilities = listOf(
+                LLMCapability.Temperature,
+                LLMCapability.Schema.JSON.Simple,
+                LLMCapability.Tools
+            )
+        )
+
         /**
          * Represents the `QWQ` language model instance provided by Alibaba with specific capabilities.
          *

Original file line number	Diff line number	Diff line change
`@@ -86,6 +86,12 @@ internal data class OllamaChatResponseDTO(`
`86`	`86`	`@SerialName("eval_count") val evalCount: Int? = null`
`87`	`87`	`)`
`88`	`88`
	`89`	`+/**`
	`90`	`+ * Error response from the /api/chat endpoint.`
	`91`	`+ */`
	`92`	`+@Serializable`
	`93`	`+internal data class OllamaErrorResponseDTO(val error: String)`
	`94`	`+`
`89`	`95`	`/**`
`90`	`96`	`* Represents a request to generate an embedding using a specific model.`
`91`	`97`	`*`