fix(tests): fix completion test with logging and increased max tokens (#1137)

ChristianZaccaria · web-flow · commit 87f7e8a0e4f1 · 2026-02-23T17:42:01.000+01:00
diff --git a/tests/llama_stack/inference/test_completions.py b/tests/llama_stack/inference/test_completions.py
@@ -1,7 +1,10 @@
 import pytest
+from simple_logger.logger import get_logger
 from llama_stack_client import LlamaStackClient
 from tests.llama_stack.constants import ModelInfo
 
+LOGGER = get_logger(name=__name__)
+
 
 @pytest.mark.parametrize(
     "unprivileged_model_namespace",
@@ -51,11 +54,12 @@ def test_inference_completion(
     ) -> None:
         """Test text completion functionality with a geography question."""
         response = unprivileged_llama_stack_client.completions.create(
-            model=llama_stack_models.model_id, prompt="What is the capital of Catalonia?", max_tokens=7, temperature=0
+            model=llama_stack_models.model_id, prompt="What is the capital of Catalonia?", max_tokens=20, temperature=0
         )
         assert len(response.choices) > 0, "No response after basic inference on llama-stack server"
 
         # Check if response has the expected structure and content
         content = response.choices[0].text.lower()
         assert content is not None, "LLM response content is None"
+        LOGGER.info(f"LLM response content for test_inference_completion: {content}")
         assert "barcelona" in content, "The LLM didn't provide the expected answer to the prompt"