Merge branch 'main' into gpu-skip

mwaykole · web-flow · commit 936b57fbcb6c · 2026-01-16T16:02:48.000+05:30
diff --git a/pyproject.toml b/pyproject.toml
@@ -70,7 +70,7 @@ dependencies = [
     "marshmallow==3.26.2,<4", # this version is needed for pytest-jira
     "pytest-html>=4.1.1",
     "fire",
-    "llama_stack_client>=0.3.0,<0.4",
+    "llama_stack_client>=0.4.0,<0.5",
     "pytest-xdist==3.8.0",
     "dictdiffer>=0.9.0",
     "pytest>=9.0.0",
diff --git a/tests/llama_stack/conftest.py b/tests/llama_stack/conftest.py
@@ -652,7 +652,7 @@ def llama_stack_models(unprivileged_llama_stack_client: LlamaStackClient) -> Mod
     """
     models = unprivileged_llama_stack_client.models.list()
 
-    model_id = next(m for m in models if m.api_model_type == "llm").identifier
+    model_id = next(m for m in models if m.custom_metadata["model_type"] == "llm").id
 
     # Ensure getting the right embedding model depending on the available providers
     providers = unprivileged_llama_stack_client.providers.list()
@@ -664,11 +664,15 @@ def llama_stack_models(unprivileged_llama_stack_client: LlamaStackClient) -> Mod
     else:
         raise ValueError("No embedding provider found")
 
-    embedding_model = next(m for m in models if m.api_model_type == "embedding" and m.provider_id == target_provider_id)
-    embedding_dimension = float(embedding_model.metadata["embedding_dimension"])
+    embedding_model = next(
+        m
+        for m in models
+        if m.custom_metadata["model_type"] == "embedding" and m.custom_metadata["provider_id"] == target_provider_id
+    )
+    embedding_dimension = int(embedding_model.custom_metadata["embedding_dimension"])
 
     LOGGER.info(f"Detected model: {model_id}")
-    LOGGER.info(f"Detected embedding_model: {embedding_model.identifier}")
+    LOGGER.info(f"Detected embedding_model: {embedding_model.id}")
     LOGGER.info(f"Detected embedding_dimension: {embedding_dimension}")
 
     return ModelInfo(model_id=model_id, embedding_model=embedding_model, embedding_dimension=embedding_dimension)
@@ -700,7 +704,7 @@ def vector_store(
     vector_store = unprivileged_llama_stack_client.vector_stores.create(
         name="test_vector_store",
         extra_body={
-            "embedding_model": llama_stack_models.embedding_model.identifier,
+            "embedding_model": llama_stack_models.embedding_model.id,
             "embedding_dimension": llama_stack_models.embedding_dimension,
             "provider_id": vector_io_provider,
         },
diff --git a/tests/llama_stack/constants.py b/tests/llama_stack/constants.py
@@ -26,7 +26,7 @@ class ModelInfo(NamedTuple):
 
     model_id: str
     embedding_model: Model
-    embedding_dimension: float  # API returns float (e.g., 768.0) despite being conceptually an integer
+    embedding_dimension: int  # API returns integer (e.g., 768)
 
 
 LLS_CORE_POD_FILTER: str = "app=llama-stack"
diff --git a/tests/llama_stack/inference/test_embeddings.py b/tests/llama_stack/inference/test_embeddings.py
@@ -50,7 +50,7 @@ def test_inference_embeddings(
 
         # Embed single input text with encoding_format=float (the returned embedding item is a list of floats)
         embeddings_response = unprivileged_llama_stack_client.embeddings.create(
-            model=llama_stack_models.embedding_model.identifier,
+            model=llama_stack_models.embedding_model.id,
             input="The food was delicious and the waiter...",
             encoding_format="float",
         )
@@ -63,7 +63,7 @@ def test_inference_embeddings(
         # Embed single input text with encoding_format=base64  (the returned embedding item is
         # a single base64-encoded string)
         embeddings_response = unprivileged_llama_stack_client.embeddings.create(
-            model=llama_stack_models.embedding_model.identifier,
+            model=llama_stack_models.embedding_model.id,
             input="The food was delicious and the waiter...",
             encoding_format="base64",
         )
@@ -74,7 +74,7 @@ def test_inference_embeddings(
         # Embed multiple input sets with encoding_format=float (each returned embedding item is a list of floats)
         input_list = ["Input text 1", "Input text 1", "Input text 1"]
         embeddings_response = unprivileged_llama_stack_client.embeddings.create(
-            model=llama_stack_models.embedding_model.identifier, input=input_list, encoding_format="float"
+            model=llama_stack_models.embedding_model.id, input=input_list, encoding_format="float"
         )
         assert isinstance(embeddings_response, CreateEmbeddingsResponse)
         assert len(embeddings_response.data) == len(input_list)
@@ -86,7 +86,7 @@ def test_inference_embeddings(
         # Embed multiple input sets with base64 encoding format (each returned embedding a single base64-encoded string)
         input_list = ["Input text 1", "Input text 1", "Input text 1"]
         embeddings_response = unprivileged_llama_stack_client.embeddings.create(
-            model=llama_stack_models.embedding_model.identifier, input=input_list, encoding_format="base64"
+            model=llama_stack_models.embedding_model.id, input=input_list, encoding_format="base64"
         )
         assert isinstance(embeddings_response, CreateEmbeddingsResponse)
         assert len(embeddings_response.data) == len(input_list)
diff --git a/tests/llama_stack/models/test_models.py b/tests/llama_stack/models/test_models.py
@@ -1,8 +1,6 @@
 import pytest
-import os
-from tests.llama_stack.constants import LlamaStackProviders
 from llama_stack_client import LlamaStackClient, NotFoundError
-from llama_stack_client.types import Model
+from llama_stack_client.types import Model, ModelRetrieveResponse
 
 
 @pytest.mark.parametrize(
@@ -39,24 +37,25 @@ def test_models_list(
         assert isinstance(models, list), "models.list() should return a list"
         assert len(models) > 0, "At least one model should be available"
 
-        llm_model = next((model for model in models if model.api_model_type == "llm"), None)
+        llm_model = next((model for model in models if model.custom_metadata["model_type"] == "llm"), None)
         assert llm_model is not None, "No LLM model found in available models"
         assert isinstance(llm_model, Model), "LLM model should be a Model instance"
-        assert llm_model.identifier is not None, "No identifier set in LLM model"
-        assert len(llm_model.identifier) > 0, "LLM model identifier should not be empty"
+        assert llm_model.id is not None, "No identifier set in LLM model"
+        assert len(llm_model.id) > 0, "LLM model identifier should not be empty"
 
-        embedding_model = next((model for model in models if model.api_model_type == "embedding"), None)
+        embedding_model = next((model for model in models if model.custom_metadata["model_type"] == "embedding"), None)
         assert embedding_model is not None, "No embedding model found in available models"
         assert isinstance(embedding_model, Model), "Embedding model should be a Model instance"
-        assert embedding_model.identifier is not None, "No identifier set in embedding model"
-        assert len(embedding_model.identifier) > 0, "Embedding model identifier should not be empty"
-        assert "embedding_dimension" in embedding_model.metadata, "embedding_dimension not found in model metadata"
-        embedding_dimension = embedding_model.metadata["embedding_dimension"]
+        assert embedding_model.id is not None, "No identifier set in embedding model"
+        assert len(embedding_model.id) > 0, "Embedding model identifier should not be empty"
+        assert "embedding_dimension" in embedding_model.custom_metadata, (
+            "embedding_dimension not found in custom_metadata"
+        )
+        embedding_dimension = embedding_model.custom_metadata["embedding_dimension"]
         assert embedding_dimension is not None, "No embedding_dimension set in embedding model"
-        # API returns dimension as float (e.g., 768.0) though conceptually an integer
-        assert isinstance(embedding_dimension, float), "embedding_dimension should be a float"
+        # API returns dimension as integer (e.g., 768)
+        assert isinstance(embedding_dimension, int), "embedding_dimension should be an integer"
         assert embedding_dimension > 0, "embedding_dimension should be positive"
-        assert embedding_dimension.is_integer(), "embedding_dimension should be a whole number"
 
     def test_models_list_structure(
         self,
@@ -71,15 +70,14 @@ def test_models_list_structure(
         assert models is not None, "No models returned from LlamaStackClient"
 
         for model in models:
-            assert hasattr(model, "identifier"), "Model should have identifier attribute"
-            assert hasattr(model, "api_model_type"), "Model should have api_model_type attribute"
-            assert model.identifier is not None, f"Model {model} should have a non-None identifier"
-            assert model.api_model_type in ["llm", "embedding"], (
-                f"Model {model.identifier} should have api_model_type 'llm' or 'embedding', "
-                f"got '{model.api_model_type}'"
+            assert hasattr(model, "id"), "Model should have identifier attribute"
+            assert hasattr(model, "custom_metadata"), "Model should have custom_metadata attribute"
+            assert isinstance(model.custom_metadata, dict), "Model custom_metadata should be a dictionary"
+            assert model.id is not None, f"Model {model} should have a non-None identifier"
+            assert model.custom_metadata["model_type"] in ["llm", "embedding"], (
+                f"Model {model.id} should have custom_metadata[\"model_type\"] 'llm' or 'embedding', "
+                f"got '{model.custom_metadata['model_type']}'"
             )
-            assert hasattr(model, "metadata"), "Model should have metadata attribute"
-            assert isinstance(model.metadata, dict), "Model metadata should be a dictionary"
 
     def test_models_retrieve_existing(
         self,
@@ -94,17 +92,16 @@ def test_models_retrieve_existing(
         assert len(models) > 0, "At least one model should be available"
 
         test_model = models[0]
-        retrieved_model = unprivileged_llama_stack_client.models.retrieve(model_id=test_model.identifier)
+        retrieved_model = unprivileged_llama_stack_client.models.retrieve(model_id=test_model.id)
 
-        assert retrieved_model is not None, f"Model {test_model.identifier} should be retrievable"
-        assert isinstance(retrieved_model, Model), "Retrieved model should be a Model instance"
-        assert retrieved_model.identifier == test_model.identifier, (
-            f"Retrieved model identifier '{retrieved_model.identifier}' "
-            f"should match requested '{test_model.identifier}'"
+        assert retrieved_model is not None, f"Model {test_model.id} should be retrievable"
+        assert isinstance(retrieved_model, ModelRetrieveResponse), "Retrieved model should be a ModelRetrieveResponse"
+        assert retrieved_model.identifier == test_model.id, (
+            f"Retrieved model identifier '{retrieved_model.identifier}' should match requested '{test_model.id}'"
         )
-        assert retrieved_model.api_model_type == test_model.api_model_type, (
+        assert retrieved_model.api_model_type == test_model.custom_metadata["model_type"], (
             f"Retrieved model type '{retrieved_model.api_model_type}' "
-            f"should match original '{test_model.api_model_type}'"
+            f"should match original '{test_model.custom_metadata['model_type']}'"
         )
 
     def test_models_retrieve_nonexistent(
@@ -120,75 +117,3 @@ def test_models_retrieve_nonexistent(
 
         with pytest.raises(NotFoundError):
             unprivileged_llama_stack_client.models.retrieve(model_id=nonexistent_model_id)
-
-    def test_models_register(
-        self,
-        unprivileged_llama_stack_client: LlamaStackClient,
-    ) -> None:
-        """Test registering a new model.
-
-        Verifies that models.register() successfully registers a new model
-        and it appears in the models list.
-        """
-        inference_model = os.getenv("LLS_CORE_INFERENCE_MODEL")
-        assert inference_model, "LLS_CORE_INFERENCE_MODEL environment variable must be set"
-        test_model_id = f"{inference_model}-test-register"
-
-        response = unprivileged_llama_stack_client.models.register(
-            model_id=test_model_id,
-            model_type="llm",
-            provider_id=LlamaStackProviders.Inference.VLLM_INFERENCE,
-        )
-        assert response is not None, "Model registration should return a response"
-
-        registered_model_id = f"{LlamaStackProviders.Inference.VLLM_INFERENCE.value}/{test_model_id}"
-        try:
-            models = unprivileged_llama_stack_client.models.list()
-            registered_model_ids = [model.identifier for model in models]
-            assert registered_model_id in registered_model_ids, (
-                f"Registered model {registered_model_id} should appear in models list"
-            )
-        finally:
-            unprivileged_llama_stack_client.models.unregister(model_id=registered_model_id)
-
-    def test_models_register_retrieve_unregister(
-        self,
-        unprivileged_llama_stack_client: LlamaStackClient,
-    ) -> None:
-        """Test complete model lifecycle: register, retrieve, and unregister.
-
-        Verifies the full workflow of registering a model, retrieving it,
-        verifying its properties, and then unregistering it.
-        """
-        inference_model = os.getenv("LLS_CORE_INFERENCE_MODEL")
-        assert inference_model, "LLS_CORE_INFERENCE_MODEL environment variable must be set"
-        test_model_id = f"{inference_model}-test-lifecycle"
-
-        response = unprivileged_llama_stack_client.models.register(
-            model_id=test_model_id,
-            model_type="llm",
-            provider_id=LlamaStackProviders.Inference.VLLM_INFERENCE,
-        )
-        assert response is not None, "Model registration should return a response"
-
-        registered_model_id = f"{LlamaStackProviders.Inference.VLLM_INFERENCE.value}/{test_model_id}"
-        try:
-            registered_model = unprivileged_llama_stack_client.models.retrieve(model_id=registered_model_id)
-            assert registered_model is not None, f"LLM {registered_model_id} not found using models.retrieve"
-            assert isinstance(registered_model, Model), "Retrieved model should be a Model instance"
-            expected_id_suffix = f"/{test_model_id}"
-            assert registered_model.identifier.endswith(expected_id_suffix), (
-                f"Model identifier '{registered_model.identifier}' should end with '{expected_id_suffix}'"
-            )
-            assert registered_model.api_model_type == "llm", (
-                f"Registered model should have api_model_type 'llm', got '{registered_model.api_model_type}'"
-            )
-            assert registered_model.provider_id == LlamaStackProviders.Inference.VLLM_INFERENCE.value, (
-                f"Registered model provider_id should be '{LlamaStackProviders.Inference.VLLM_INFERENCE.value}', "
-                f"got '{registered_model.provider_id}'"
-            )
-        finally:
-            unprivileged_llama_stack_client.models.unregister(model_id=registered_model_id)
-
-        with pytest.raises(NotFoundError):
-            unprivileged_llama_stack_client.models.retrieve(model_id=registered_model_id)
diff --git a/tests/llama_stack/safety/test_trustyai_fms_provider.py b/tests/llama_stack/safety/test_trustyai_fms_provider.py
@@ -1,6 +1,5 @@
 import pytest
 import yaml
-from llama_stack_client.types.chat.completion_create_params import MessageOpenAIUserMessageParam
 from simple_logger.logger import get_logger
 
 from tests.llama_stack.constants import LlamaStackProviders
@@ -88,10 +87,10 @@ def test_fms_guardrails_run_shield(self, minio_pod, minio_data_connection, llama
         run_shields_response = llama_stack_client.safety.run_shield(
             shield_id=SECURE_SHIELD_ID,
             messages=[
-                MessageOpenAIUserMessageParam(
-                    content="My email is johndoe@example.com",
-                    role="user",
-                )
+                {
+                    "content": "My email is johndoe@example.com",
+                    "role": "user",
+                }
             ],
             params={},
         )
diff --git a/tests/model_registry/conftest.py b/tests/model_registry/conftest.py
@@ -1,4 +1,3 @@
-import time
 from contextlib import ExitStack
 import pytest
 from pytest import Config, FixtureRequest
@@ -318,8 +317,6 @@ def model_registry_instance(
                 wait_for_pods_running(
                     admin_client=admin_client, namespace_name=model_registry_namespace, number_of_consecutive_checks=6
                 )
-            # TODO remove when RHOAIENG-41728 is addressed
-            time.sleep(60.0)  # noqa: FCN001
             yield mr_instances
         if db_name == "default":
             wait_for_default_resource_cleanedup(admin_client=admin_client, namespace_name=model_registry_namespace)
diff --git a/tests/model_registry/model_catalog/huggingface/conftest.py b/tests/model_registry/model_catalog/huggingface/conftest.py
@@ -1,7 +1,36 @@
 import pytest
 from huggingface_hub import HfApi
+from simple_logger.logger import get_logger
+
+LOGGER = get_logger(name=__name__)
 
 
 @pytest.fixture()
 def huggingface_api():
     return HfApi()
+
+
+@pytest.fixture()
+def num_models_from_hf_api_with_matching_criteria(request: pytest.FixtureRequest, huggingface_api: HfApi) -> int:
+    excluded_str = request.param.get("excluded_str")
+    included_str = request.param.get("included_str")
+    models = huggingface_api.list_models(author=request.param["org_name"], limit=10000)
+    model_list = []
+    for model in models:
+        if excluded_str:
+            if model.id.endswith(excluded_str):
+                LOGGER.info(f"Skipping {model.id} due to {excluded_str}")
+                continue
+            else:
+                LOGGER.info(f"Adding {model.id}")
+                model_list.append(model.id)
+        elif included_str:
+            if model.id.startswith(included_str):
+                LOGGER.info(f"Adding {model.id}")
+                model_list.append(model.id)
+            else:
+                LOGGER.info(f"Skipping {model.id} due to {included_str}")
+                continue
+        else:
+            model_list.append(model.id)
+    return len(model_list)
diff --git a/tests/model_registry/model_catalog/huggingface/test_huggingface_model_validation.py b/tests/model_registry/model_catalog/huggingface/test_huggingface_model_validation.py
diff --git a/tests/model_registry/model_catalog/huggingface/utils.py b/tests/model_registry/model_catalog/huggingface/utils.py
diff --git a/tests/model_registry/model_registry/rest_api/conftest.py b/tests/model_registry/model_registry/rest_api/conftest.py
diff --git a/uv.lock b/uv.lock