1919 create_llama_stack_distribution ,
2020 wait_for_llama_stack_client_ready ,
2121 vector_store_create_file_from_url ,
22+ wait_for_unique_llama_stack_pod ,
2223)
2324from utilities .constants import DscComponents , Annotations
2425from utilities .data_science_cluster_utils import update_components_in_dsc
3031
3132LOGGER = get_logger (name = __name__ )
3233
34+ distribution_name = generate_random_name (prefix = "llama-stack-distribution" )
35+
3336
3437@pytest .fixture (scope = "class" )
3538def enabled_llama_stack_operator (dsc_resource : DataScienceCluster ) -> Generator [DataScienceCluster , Any , Any ]:
@@ -87,6 +90,14 @@ def llama_stack_server_config(
8790 - fms_orchestrator_url_fixture: Fixture name to get FMS orchestrator URL from
8891 - vector_io_provider: Vector I/O provider type ("milvus" or "milvus-remote")
8992 - llama_stack_storage_size: Storage size for the deployment
93+ - embedding_model: Embedding model identifier for inference
94+ - kubeflow_llama_stack_url: LlamaStack service URL for Kubeflow
95+ - kubeflow_pipelines_endpoint: Kubeflow Pipelines API endpoint URL
96+ - kubeflow_namespace: Namespace for Kubeflow resources
97+ - kubeflow_base_image: Base container image for Kubeflow pipelines
98+ - kubeflow_results_s3_prefix: S3 prefix for storing Kubeflow results
99+ - kubeflow_s3_credentials_secret_name: Secret name for S3 credentials
100+ - kubeflow_pipelines_token: Authentication token for Kubeflow Pipelines
90101
91102 Example:
92103 @pytest.mark.parametrize("llama_stack_server_config",
@@ -136,6 +147,48 @@ def test_with_remote_milvus(llama_stack_server_config):
136147 if embedding_model :
137148 env_vars .append ({"name" : "EMBEDDING_MODEL" , "value" : embedding_model })
138149
150+ # Kubeflow-related environment variables
151+ if params .get ("enable_ragas_remote" ):
152+ # Get fixtures only when Ragas Remote/Kubeflow is enabled
153+ model_namespace = request .getfixturevalue (argname = "model_namespace" )
154+ current_client_token = request .getfixturevalue (argname = "current_client_token" )
155+ dspa_route = request .getfixturevalue (argname = "dspa_route" )
156+ dspa_s3_secret = request .getfixturevalue (argname = "dspa_s3_secret" )
157+
158+ # KUBEFLOW_LLAMA_STACK_URL: Build from LlamaStackDistribution service
159+ env_vars .append ({
160+ "name" : "KUBEFLOW_LLAMA_STACK_URL" ,
161+ "value" : f"http://{ distribution_name } -service.{ model_namespace .name } .svc.cluster.local:8321" ,
162+ })
163+
164+ # KUBEFLOW_PIPELINES_ENDPOINT: Get from DSPA route
165+ env_vars .append ({"name" : "KUBEFLOW_PIPELINES_ENDPOINT" , "value" : f"https://{ dspa_route .instance .spec .host } " })
166+
167+ # KUBEFLOW_NAMESPACE: Use model namespace
168+ env_vars .append ({"name" : "KUBEFLOW_NAMESPACE" , "value" : model_namespace .name })
169+
170+ # KUBEFLOW_BASE_IMAGE
171+ env_vars .append ({
172+ "name" : "KUBEFLOW_BASE_IMAGE" ,
173+ "value" : params .get (
174+ "kubeflow_base_image" ,
175+ "quay.io/diegosquayorg/my-ragas-provider-image"
176+ "@sha256:3749096c47f7536d6be2a7932e691abebacd578bafbe65bad2f7db475e2b93fb" ,
177+ ),
178+ })
179+
180+ # KUBEFLOW_RESULTS_S3_PREFIX: Build from MinIO bucket
181+ env_vars .append ({
182+ "name" : "KUBEFLOW_RESULTS_S3_PREFIX" ,
183+ "value" : params .get ("kubeflow_results_s3_prefix" , "s3://llms/ragas-results" ),
184+ })
185+
186+ # KUBEFLOW_S3_CREDENTIALS_SECRET_NAME: Use DSPA secret name
187+ env_vars .append ({"name" : "KUBEFLOW_S3_CREDENTIALS_SECRET_NAME" , "value" : dspa_s3_secret .name })
188+
189+ # KUBEFLOW_PIPELINES_TOKEN: Get from current client token
190+ env_vars .append ({"name" : "KUBEFLOW_PIPELINES_TOKEN" , "value" : str (current_client_token )})
191+
139192 # Depending on parameter vector_io_provider, deploy vector_io provider and obtain required env_vars
140193 vector_io_provider = params .get ("vector_io_provider" ) or "milvus"
141194 env_vars_vector_io = vector_io_provider_deployment_config_factory (provider_name = vector_io_provider )
@@ -144,8 +197,8 @@ def test_with_remote_milvus(llama_stack_server_config):
144197 server_config : Dict [str , Any ] = {
145198 "containerSpec" : {
146199 "resources" : {
147- "requests" : {"cpu" : "250m " , "memory" : "500Mi " },
148- "limits" : {"cpu" : "2 " , "memory" : "12Gi " },
200+ "requests" : {"cpu" : "1 " , "memory" : "3Gi " },
201+ "limits" : {"cpu" : "3 " , "memory" : "6Gi " },
149202 },
150203 "env" : env_vars ,
151204 "name" : "llama-stack" ,
@@ -189,7 +242,6 @@ def llama_stack_distribution(
189242 llama_stack_server_config : Dict [str , Any ],
190243) -> Generator [LlamaStackDistribution , None , None ]:
191244 # Distribution name needs a random substring due to bug RHAIENG-999 / RHAIENG-1139
192- distribution_name = generate_random_name (prefix = "llama-stack-distribution" )
193245 with create_llama_stack_distribution (
194246 client = admin_client ,
195247 name = distribution_name ,
@@ -208,6 +260,7 @@ def _get_llama_stack_distribution_deployment(
208260 """
209261 Returns the Deployment resource for a given LlamaStackDistribution.
210262 Note: The deployment is created by the operator; this function retrieves it.
263+ Includes a workaround for RHAIENG-1819 to ensure exactly one pod exists.
211264
212265 Args:
213266 client (DynamicClient): Kubernetes client
@@ -222,9 +275,12 @@ def _get_llama_stack_distribution_deployment(
222275 name = llama_stack_distribution .name ,
223276 min_ready_seconds = 10 ,
224277 )
225-
278+ deployment . timeout_seconds = 120
226279 deployment .wait (timeout = 120 )
227280 deployment .wait_for_replicas ()
281+ # Workaround for RHAIENG-1819 (Incorrect number of llama-stack pods deployed after
282+ # creating LlamaStackDistribution after setting custom ca bundle in DSCI)
283+ wait_for_unique_llama_stack_pod (client = client , namespace = llama_stack_distribution .namespace )
228284 yield deployment
229285
230286
@@ -321,6 +377,7 @@ def _create_llama_stack_test_route(
321377 }
322378 }
323379 ):
380+ route .wait (timeout = 60 )
324381 yield route
325382
326383
@@ -355,11 +412,11 @@ def _create_llama_stack_client(
355412) -> Generator [LlamaStackClient , Any , Any ]:
356413 # LLS_CLIENT_VERIFY_SSL is false by default to be able to test with Self-Signed certificates
357414 verifySSL = os .getenv ("LLS_CLIENT_VERIFY_SSL" , "false" ).lower () == "true"
358- http_client = httpx .Client (verify = verifySSL )
415+ http_client = httpx .Client (verify = verifySSL , timeout = 240 )
359416 try :
360417 client = LlamaStackClient (
361418 base_url = f"https://{ route .host } " ,
362- timeout = 180.0 ,
419+ max_retries = 3 ,
363420 http_client = http_client ,
364421 )
365422 wait_for_llama_stack_client_ready (client = client )
0 commit comments