Skip to content

Commit 3d88195

Browse files
authored
Separate Ollama tests and set temperature to 0 (#202)
1 parent 78ad266 commit 3d88195

File tree

11 files changed

+215
-152
lines changed

11 files changed

+215
-152
lines changed

.github/workflows/heavy-tests.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,8 @@ jobs:
4444
ANTHROPIC_API_TEST_KEY: ${{ secrets.ANTHROPIC_API_TEST_KEY }}
4545
OPEN_AI_API_TEST_KEY: ${{ secrets.OPEN_AI_API_TEST_KEY }}
4646
GEMINI_API_TEST_KEY: ${{ secrets.GEMINI_API_TEST_KEY }}
47-
OLLAMA_IMAGE_URL: ${{ vars.OLLAMA_IMAGE_URL }}
4847
OPEN_ROUTER_API_TEST_KEY: ${{ vars.OPEN_ROUTER_API_TEST_KEY }}
49-
run: ./gradlew jvmIntegrationTest --no-parallel --continue
48+
run: ./gradlew jvmIntegrationTest --continue
5049

5150
- name: Collect reports
5251
if: always()

.github/workflows/ollama-tests.yml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# This workflow uses actions that are not certified by GitHub.
2+
# They are provided by a third-party and are governed by
3+
# separate terms of service, privacy policy, and support
4+
# documentation.
5+
# This workflow will build a Java project with Gradle and cache/restore any dependencies to improve the workflow execution time
6+
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-java-with-gradle
7+
8+
name: Ollama Tests
9+
10+
on:
11+
workflow_dispatch: # Manual trigger
12+
push:
13+
branches: [ "main", "develop" ]
14+
15+
jobs:
16+
integration-tests:
17+
18+
runs-on: ${{ matrix.os }}
19+
permissions:
20+
contents: read
21+
22+
strategy:
23+
matrix:
24+
os: [ ubuntu-latest ]
25+
26+
steps:
27+
- name: Configure Git
28+
run: |
29+
git config --global core.autocrlf input
30+
- uses: actions/checkout@v4
31+
- name: Set up JDK 17
32+
uses: actions/setup-java@v4
33+
with:
34+
java-version: '17'
35+
distribution: 'corretto'
36+
37+
# Configure Gradle for optimal use in GitHub Actions, including caching of downloaded dependencies.
38+
# See: https://github.com/gradle/actions/blob/main/setup-gradle/README.md
39+
- name: Setup Gradle
40+
uses: gradle/actions/setup-gradle@af1da67850ed9a4cedd57bfd976089dd991e2582 # v4.0.0
41+
42+
- name: JvmOllamaTest with Gradle Wrapper
43+
env:
44+
OLLAMA_IMAGE_URL: ${{ vars.OLLAMA_IMAGE_URL }}
45+
run: ./gradlew jvmOllamaTest --no-parallel --continue
46+
47+
- name: Collect reports
48+
if: always()
49+
uses: actions/upload-artifact@v4
50+
with:
51+
name: reports-${{ matrix.os }}
52+
path: |
53+
**/build/reports/

buildSrc/src/main/kotlin/ai/grazie/gradle/tests/TestType.kt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ enum class TestType(
1212
PERFORMANCE("*.prf_*", "prf", parallelism = false),
1313
GPU("*.gpu_*", "gpu", maxHeapForJvm = "2g"),
1414
CLIENT("*.client_*", "client"),
15-
INTEGRATION("*.integration_*", "integration");
15+
INTEGRATION("*.integration_*", "integration"),
16+
OLLAMA("*.ollama_*", "ollama");
1617

1718
companion object {
1819
internal val testTypesWithoutMain = values().asList().minus(DEFAULT)

integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/MultipleLLMPromptExecutorIntegrationTest.kt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import ai.koog.integration.tests.utils.TestUtils.readTestOpenAIKeyFromEnv
1212
import ai.koog.prompt.dsl.Prompt
1313
import ai.koog.prompt.executor.clients.anthropic.AnthropicLLMClient
1414
import ai.koog.prompt.executor.clients.google.GoogleLLMClient
15+
import ai.koog.prompt.executor.clients.google.GoogleModels
1516
import ai.koog.prompt.executor.clients.openai.OpenAILLMClient
1617
import ai.koog.prompt.executor.llms.MultiLLMPromptExecutor
1718
import ai.koog.prompt.executor.llms.all.DefaultMultiLLMPromptExecutor
@@ -146,6 +147,8 @@ class MultipleLLMPromptExecutorIntegrationTest {
146147
@ParameterizedTest
147148
@MethodSource("openAIModels", "anthropicModels", "googleModels")
148149
fun integration_testToolsWithRequiredParams(model: LLModel) = runTest(timeout = 300.seconds) {
150+
// ToDo remove after fix
151+
assumeTrue(model != GoogleModels.Gemini2_5ProPreview0506, "JBAI-14481")
149152
assumeTrue(model.capabilities.contains(LLMCapability.Tools), "Model $model does not support tools")
150153

151154
val calculatorTool = ToolDescriptor(
@@ -191,6 +194,8 @@ class MultipleLLMPromptExecutorIntegrationTest {
191194
@ParameterizedTest
192195
@MethodSource("openAIModels", "anthropicModels", "googleModels")
193196
fun integration_testToolsWithRequiredOptionalParams(model: LLModel) = runTest(timeout = 300.seconds) {
197+
// ToDo remove after fix
198+
assumeTrue(model != GoogleModels.Gemini2_5ProPreview0506, "JBAI-14481")
194199
assumeTrue(model.capabilities.contains(LLMCapability.Tools), "Model $model does not support tools")
195200

196201
val calculatorTool = ToolDescriptor(
@@ -243,6 +248,8 @@ class MultipleLLMPromptExecutorIntegrationTest {
243248
@ParameterizedTest
244249
@MethodSource("openAIModels", "anthropicModels", "googleModels")
245250
fun integration_testToolsWithOptionalParams(model: LLModel) = runTest(timeout = 300.seconds) {
251+
// ToDo remove after fix
252+
assumeTrue(model != GoogleModels.Gemini2_5ProPreview0506, "JBAI-14481")
246253
assumeTrue(model.capabilities.contains(LLMCapability.Tools), "Model $model does not support tools")
247254

248255
val calculatorTool = ToolDescriptor(
@@ -293,6 +300,8 @@ class MultipleLLMPromptExecutorIntegrationTest {
293300
@ParameterizedTest
294301
@MethodSource("openAIModels", "anthropicModels", "googleModels")
295302
fun integration_testToolsWithNoParams(model: LLModel) = runTest(timeout = 300.seconds) {
303+
// ToDo remove after fix
304+
assumeTrue(model != GoogleModels.Gemini2_5ProPreview0506, "JBAI-14481")
296305
assumeTrue(model.capabilities.contains(LLMCapability.Tools), "Model $model does not support tools")
297306

298307
val calculatorTool = ToolDescriptor(
@@ -328,6 +337,8 @@ class MultipleLLMPromptExecutorIntegrationTest {
328337
@ParameterizedTest
329338
@MethodSource("openAIModels", "anthropicModels", "googleModels")
330339
fun integration_testToolsWithListEnumParams(model: LLModel) = runTest(timeout = 300.seconds) {
340+
// ToDo remove after fix
341+
assumeTrue(model != GoogleModels.Gemini2_5ProPreview0506, "JBAI-14481")
331342
assumeTrue(model.capabilities.contains(LLMCapability.Tools), "Model $model does not support tools")
332343

333344
val colorPickerTool = ToolDescriptor(
@@ -364,6 +375,8 @@ class MultipleLLMPromptExecutorIntegrationTest {
364375
@ParameterizedTest
365376
@MethodSource("openAIModels", "anthropicModels", "googleModels")
366377
fun integration_testToolsWithNestedListParams(model: LLModel) = runTest(timeout = 300.seconds) {
378+
// ToDo remove after fix
379+
assumeTrue(model != GoogleModels.Gemini2_5ProPreview0506, "JBAI-14481")
367380
assumeTrue(model.capabilities.contains(LLMCapability.Tools), "Model $model does not support tools")
368381

369382
val lotteryPickerTool = ToolDescriptor(
@@ -479,6 +492,8 @@ class MultipleLLMPromptExecutorIntegrationTest {
479492
@ParameterizedTest
480493
@MethodSource("openAIModels", "anthropicModels", "googleModels")
481494
fun integration_testToolChoice(model: LLModel) = runTest(timeout = 300.seconds) {
495+
// ToDo remove after fix
496+
assumeTrue(model != GoogleModels.Gemini2_5ProPreview0506, "JBAI-14481")
482497
assumeTrue(model.capabilities.contains(LLMCapability.Tools), "Model $model does not support tools")
483498

484499
val calculatorTool = ToolDescriptor(

integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/OllamaAgentIntegrationTest.kt

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,15 @@ import ai.koog.integration.tests.utils.TestUtils.runWithRetry
1515
import ai.koog.prompt.dsl.prompt
1616
import ai.koog.prompt.executor.model.PromptExecutor
1717
import ai.koog.prompt.llm.OllamaModels
18+
import ai.koog.prompt.params.LLMParams
1819
import kotlinx.coroutines.test.runTest
19-
import org.junit.jupiter.api.condition.EnabledOnOs
20-
import org.junit.jupiter.api.condition.OS
2120
import org.junit.jupiter.api.extension.ExtendWith
2221
import kotlin.test.Test
2322
import kotlin.test.assertContains
2423
import kotlin.test.assertNotNull
2524
import kotlin.test.assertTrue
2625
import kotlin.time.Duration.Companion.seconds
2726

28-
@EnabledOnOs(OS.LINUX, OS.MAC)
2927
@ExtendWith(OllamaTestFixtureExtension::class)
3028
class OllamaAgentIntegrationTest {
3129
companion object {
@@ -137,7 +135,11 @@ class OllamaAgentIntegrationTest {
137135
return AIAgent(
138136
promptExecutor = executor,
139137
strategy = strategy,
140-
agentConfig = AIAgentConfig(prompt("test-ollama") {}, model, 15),
138+
agentConfig = AIAgentConfig(
139+
prompt("test-ollama", LLMParams(temperature = 0.0)) {},
140+
model,
141+
15
142+
),
141143
toolRegistry = toolRegistry
142144
) {
143145
install(EventHandler) {
@@ -151,7 +153,7 @@ class OllamaAgentIntegrationTest {
151153

152154
onBeforeLLMCall = { prompt, tools ->
153155
val promptText = prompt.messages.joinToString { "${it.role.name}: ${it.content}" }
154-
val toolsText = tools.joinToString{ it.name }
156+
val toolsText = tools.joinToString { it.name }
155157
println("Prompt with tools:\n$promptText\nAvailable tools:\n$toolsText")
156158
promptsAndResponses.add("PROMPT_WITH_TOOLS: $promptText")
157159
}
@@ -170,7 +172,7 @@ class OllamaAgentIntegrationTest {
170172
}
171173

172174
@Test
173-
fun integration_testOllamaAgentClearContext() = runTest(timeout = 600.seconds) {
175+
fun ollama_testAgentClearContext() = runTest(timeout = 600.seconds) {
174176
val strategy = createTestStrategy()
175177
val toolRegistry = createToolRegistry()
176178
val agent = createAgent(executor, strategy, toolRegistry)

integration-tests/src/jvmTest/kotlin/ai/koog/integration/tests/OllamaClientIntegrationTest.kt

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,11 @@ import kotlinx.coroutines.flow.flow
1010
import kotlinx.coroutines.test.runTest
1111
import kotlinx.serialization.Serializable
1212
import org.junit.jupiter.api.Disabled
13-
import org.junit.jupiter.api.condition.EnabledOnOs
14-
import org.junit.jupiter.api.condition.OS
1513
import org.junit.jupiter.api.extension.ExtendWith
1614
import kotlin.test.Test
1715
import kotlin.test.assertTrue
1816
import kotlin.time.Duration.Companion.seconds
1917

20-
@EnabledOnOs(OS.LINUX, OS.MAC)
2118
@ExtendWith(OllamaTestFixtureExtension::class)
2219
class OllamaClientIntegrationTest {
2320
companion object {
@@ -28,7 +25,7 @@ class OllamaClientIntegrationTest {
2825
}
2926

3027
@Test
31-
fun `integration_test execute simple prompt`() = runTest(timeout = 600.seconds) {
28+
fun `ollama_test execute simple prompt`() = runTest(timeout = 600.seconds) {
3229
val prompt = Prompt.build("test") {
3330
system("You are a helpful assistant.")
3431
user("What is the capital of France?")
@@ -41,7 +38,7 @@ class OllamaClientIntegrationTest {
4138
}
4239

4340
@Test
44-
fun `integration_test execute tools with required parameters`() = runTest(timeout = 600.seconds) {
41+
fun `ollama_test execute tools with required parameters`() = runTest(timeout = 600.seconds) {
4542
val searchTool = ToolDescriptor(
4643
name = "search",
4744
description = "Search for information",
@@ -70,7 +67,7 @@ class OllamaClientIntegrationTest {
7067
}
7168

7269
@Test
73-
fun `integration_test execute tools with required and optional parameters`() = runTest(timeout = 600.seconds) {
70+
fun `ollama_test execute tools with required and optional parameters`() = runTest(timeout = 600.seconds) {
7471
val searchTool = ToolDescriptor(
7572
name = "search",
7673
description = "Search for information",
@@ -101,7 +98,7 @@ class OllamaClientIntegrationTest {
10198
}
10299

103100
@Test
104-
fun `integration_test execute tools with optional parameters`() = runTest(timeout = 600.seconds) {
101+
fun `ollama_test execute tools with optional parameters`() = runTest(timeout = 600.seconds) {
105102
val searchTool = ToolDescriptor(
106103
name = "search",
107104
description = "Search for information",
@@ -131,7 +128,7 @@ class OllamaClientIntegrationTest {
131128
}
132129

133130
@Test
134-
fun `integration_test execute tools with no parameters`() = runTest(timeout = 600.seconds) {
131+
fun `ollama_test execute tools with no parameters`() = runTest(timeout = 600.seconds) {
135132
val getTimeTool = ToolDescriptor(
136133
name = "getTime",
137134
description = "Get the current time"
@@ -148,7 +145,7 @@ class OllamaClientIntegrationTest {
148145
}
149146

150147
@Test
151-
fun `integration_test execute tools with int parameter`() = runTest(timeout = 600.seconds) {
148+
fun `ollama_test execute tools with int parameter`() = runTest(timeout = 600.seconds) {
152149
val setLimitTool = ToolDescriptor(
153150
name = "setLimit",
154151
description = "Set the limit",
@@ -172,7 +169,7 @@ class OllamaClientIntegrationTest {
172169
}
173170

174171
@Test
175-
fun `integration_test execute tools with float parameter`() = runTest(timeout = 600.seconds) {
172+
fun `ollama_test execute tools with float parameter`() = runTest(timeout = 600.seconds) {
176173
val printValueTool = ToolDescriptor(
177174
name = "printValue",
178175
description = "Print the value",
@@ -196,7 +193,7 @@ class OllamaClientIntegrationTest {
196193
}
197194

198195
@Test
199-
fun `integration_test execute tools with string parameter`() = runTest(timeout = 600.seconds) {
196+
fun `ollama_test execute tools with string parameter`() = runTest(timeout = 600.seconds) {
200197
val setNameTool = ToolDescriptor(
201198
name = "setName",
202199
description = "Set the name",
@@ -220,7 +217,7 @@ class OllamaClientIntegrationTest {
220217
}
221218

222219
@Test
223-
fun `integration_test execute tools with enum parameter`() = runTest(timeout = 600.seconds) {
220+
fun `ollama_test execute tools with enum parameter`() = runTest(timeout = 600.seconds) {
224221
val setColor = ToolDescriptor(
225222
name = "setColor",
226223
description = "Set the color",
@@ -249,7 +246,7 @@ class OllamaClientIntegrationTest {
249246
}
250247

251248
@Test
252-
fun `integration_test execute tools with serializable enum parameter`() = runTest(timeout = 600.seconds) {
249+
fun `ollama_test execute tools with serializable enum parameter`() = runTest(timeout = 600.seconds) {
253250
val calculatorTool = ToolDescriptor(
254251
name = "calculator",
255252
description = "A simple calculator that can add, subtract, multiply, and divide two numbers.",
@@ -283,7 +280,7 @@ class OllamaClientIntegrationTest {
283280
}
284281

285282
@Test
286-
fun `integration_test execute tools with list of strings parameter`() = runTest(timeout = 600.seconds) {
283+
fun `ollama_test execute tools with list of strings parameter`() = runTest(timeout = 600.seconds) {
287284
val setTags = ToolDescriptor(
288285
name = "setTags",
289286
description = "Set the tags",
@@ -307,7 +304,7 @@ class OllamaClientIntegrationTest {
307304
}
308305

309306
@Test
310-
fun `integration_test execute tools with list of integers parameter`() = runTest(timeout = 600.seconds) {
307+
fun `ollama_test execute tools with list of integers parameter`() = runTest(timeout = 600.seconds) {
311308
val setValues = ToolDescriptor(
312309
name = "setValues",
313310
description = "Set the values",
@@ -331,7 +328,7 @@ class OllamaClientIntegrationTest {
331328
}
332329

333330
@Test
334-
fun `integration_test execute tools with list of floats parameter`() = runTest(timeout = 600.seconds) {
331+
fun `ollama_test execute tools with list of floats parameter`() = runTest(timeout = 600.seconds) {
335332
val setValues = ToolDescriptor(
336333
name = "setValues",
337334
description = "Set the values",
@@ -364,7 +361,7 @@ class OllamaClientIntegrationTest {
364361
}
365362

366363
@Test
367-
fun `integration_test execute tools with list of enums parameter`() = runTest(timeout = 600.seconds) {
364+
fun `ollama_test execute tools with list of enums parameter`() = runTest(timeout = 600.seconds) {
368365
val setTags = ToolDescriptor(
369366
name = "setTags",
370367
description = "Set the tags",
@@ -388,7 +385,7 @@ class OllamaClientIntegrationTest {
388385
}
389386

390387
@Test
391-
fun `integration_test execute tools with list of lists parameter`() = runTest(timeout = 600.seconds) {
388+
fun `ollama_test execute tools with list of lists parameter`() = runTest(timeout = 600.seconds) {
392389
val setTags = ToolDescriptor(
393390
name = "setTags",
394391
description = "Set the tags",
@@ -413,7 +410,7 @@ class OllamaClientIntegrationTest {
413410
}
414411

415412
@Test
416-
fun integration_testStreamingApiWithLargeText() = runTest(timeout = 600.seconds) {
413+
fun ollama_testStreamingApiWithLargeText() = runTest(timeout = 600.seconds) {
417414
val prompt = Prompt.build("test") {
418415
system("You are a helpful assistant.")
419416
user("Write a detailed essay about the history of artificial intelligence, including its origins, major milestones, key figures, and current state. Please make it at least 1000 words.")
@@ -577,7 +574,7 @@ class OllamaClientIntegrationTest {
577574

578575
@Disabled("JBAI-14221")
579576
@Test
580-
fun `integration_test execute streaming API with structured data`() = runTest(timeout = 600.seconds) {
577+
fun `ollama_test execute streaming API with structured data`() = runTest(timeout = 600.seconds) {
581578
val countries = mutableListOf<Country>()
582579
val countryDefinition = markdownCountryDefinition()
583580

0 commit comments

Comments
 (0)