feat: Delete deprecated agents tests (#923)

jgarciao · web-flow · commit d6df7042f490 · 2025-12-11T02:25:50.000+05:30
* feat: Delete deprecated agents tests

The agents API is deprecated in llama-stack 0.3.x and will be removed in
0.4.x

Signed-off-by: Jorge Garcia Oncins &lt;jgarciao@redhat.com&gt;

* fix: delete unused method validate_agents_responses

Signed-off-by: Jorge Garcia Oncins &lt;jgarciao@redhat.com&gt;

---------

Signed-off-by: Jorge Garcia Oncins &lt;jgarciao@redhat.com&gt;
diff --git a/tests/llama_stack/README.md b/tests/llama_stack/README.md
@@ -8,7 +8,6 @@ The folder structure is based on the upstream Llama Stack integration tests, ava
 
 ### Current Test Suites
 
-- **`agents/`** - Agent functionality tests
 - **`eval/`** - Evaluation provider tests (LM Eval)
 - **`inference/`** - Inference functionality tests
 - **`models/`** - Model management and catalog tests
diff --git a/tests/llama_stack/agents/test_agents_deprecated.py b/tests/llama_stack/agents/test_agents_deprecated.py
diff --git a/tests/llama_stack/utils.py b/tests/llama_stack/utils.py
@@ -17,11 +17,9 @@
     TurnExpectation,
     ModelInfo,
     ValidationResult,
-    TurnResult,
     LLS_CORE_POD_FILTER,
 )
 
-from llama_stack_client import Agent, AgentEventLogger
 import tempfile
 import requests
 
@@ -156,161 +154,6 @@ def _response_fn(*, question: str) -> str:
     return _response_fn
 
 
-def extract_event_content(event: Any) -> str:
-    """Extract content from various event types."""
-    for attr in ["content", "message", "text"]:
-        if hasattr(event, attr) and getattr(event, attr):
-            return str(getattr(event, attr))
-    return ""
-
-
-def validate_rag_agent_responses(
-    rag_agent: Agent,
-    session_id: str,
-    turns_with_expectations: List[TurnExpectation],
-    stream: bool = True,
-    verbose: bool = True,
-    min_keywords_required: int = 1,
-    print_events: bool = False,
-) -> ValidationResult:
-    """
-    Validate RAG agent responses against expected keywords.
-
-    Tests multiple questions and validates that responses contain expected keywords.
-    Returns validation results with success status and detailed results for each turn.
-    """
-
-    all_results = []
-    total_turns = len(turns_with_expectations)
-    successful_turns = 0
-
-    for turn_idx, turn_data in enumerate(turns_with_expectations, 1):
-        question = turn_data["question"]
-        expected_keywords = turn_data["expected_keywords"]
-        description = turn_data.get("description", "")
-
-        if verbose:
-            LOGGER.info(f"[{turn_idx}/{total_turns}] Processing: {question}")
-            if description:
-                LOGGER.info(f"Expected: {description}")
-
-        # Collect response content for validation
-        response_content = ""
-        event_count = 0
-
-        try:
-            # Create turn with the agent
-            turn_response = rag_agent.create_turn(
-                messages=[{"role": "user", "content": question}],
-                session_id=session_id,
-                stream=stream,
-            )
-
-            if stream:
-                for event in AgentEventLogger().log(turn_response):
-                    if print_events:
-                        event.print()
-                    event_count += 1
-
-                    # Extract content from different event types
-                    response_content += extract_event_content(event)
-            else:
-                response_content = turn_response.output_text
-
-            # Validate response content
-            response_lower = response_content.lower()
-            found_keywords = []
-            missing_keywords = []
-
-            for keyword in expected_keywords:
-                if keyword.lower() in response_lower:
-                    found_keywords.append(keyword)
-                else:
-                    missing_keywords.append(keyword)
-
-            # Determine if this turn was successful
-            if stream:
-                turn_successful = (
-                    event_count > 0 and len(response_content) > 0 and len(found_keywords) >= min_keywords_required
-                )
-            else:
-                turn_successful = len(response_content) > 0 and len(found_keywords) >= min_keywords_required
-
-            if turn_successful:
-                successful_turns += 1
-
-            # Store results for this turn
-            turn_result = {
-                "question": question,
-                "description": description,
-                "expected_keywords": expected_keywords,
-                "found_keywords": found_keywords,
-                "missing_keywords": missing_keywords,
-                "response_content": response_content,
-                "response_length": len(response_content),
-                "event_count": event_count,
-                "success": turn_successful,
-                "error": None,
-            }
-
-            all_results.append(turn_result)
-
-            if verbose:
-                LOGGER.info(f"Response length: {len(response_content)}")
-                LOGGER.info(f"Events processed: {event_count}")
-                LOGGER.info(f"Found keywords: {found_keywords}")
-
-                if missing_keywords:
-                    LOGGER.warning(f"Missing expected keywords: {missing_keywords}")
-
-                if turn_successful:
-                    LOGGER.info(f"✓ Successfully validated response for: {question}")
-                else:
-                    LOGGER.error(f"✗ Validation failed for: {question}")
-
-                if turn_idx < total_turns:  # Don't print separator after last turn
-                    LOGGER.info("-" * 50)
-
-        except Exception as exc:
-            LOGGER.exception("Error processing turn %s", question)
-            turn_result = {
-                "question": question,
-                "description": description,
-                "expected_keywords": expected_keywords,
-                "found_keywords": [],
-                "missing_keywords": expected_keywords,
-                "response_content": "",
-                "response_length": 0,
-                "event_count": 0,
-                "success": False,
-                "error": str(exc),
-            }
-            all_results.append(turn_result)
-
-    # Generate summary
-    summary = {
-        "total_turns": total_turns,
-        "successful_turns": successful_turns,
-        "failed_turns": total_turns - successful_turns,
-        "success_rate": successful_turns / total_turns if total_turns > 0 else 0,
-        "total_events": sum(cast(TurnResult, result)["event_count"] for result in all_results),
-        "total_response_length": sum(cast(TurnResult, result)["response_length"] for result in all_results),
-    }
-
-    overall_success = successful_turns == total_turns
-
-    if verbose:
-        LOGGER.info("=" * 60)
-        LOGGER.info("VALIDATION SUMMARY:")
-        LOGGER.info(f"Total turns: {summary['total_turns']}")
-        LOGGER.info(f"Successful: {summary['successful_turns']}")
-        LOGGER.info(f"Failed: {summary['failed_turns']}")
-        LOGGER.info(f"Success rate: {summary['success_rate']:.1%}")
-        LOGGER.info(f"Overall result: {'✓ PASSED' if overall_success else '✗ FAILED'}")
-
-    return cast(ValidationResult, {"success": overall_success, "results": all_results, "summary": summary})
-
-
 def validate_api_responses(
     response_fn: Callable[..., str],
     test_cases: List[TurnExpectation],