huggingface · regisss · Jun 24, 2025 · Apr 10, 2025 · Apr 10, 2025 · Apr 10, 2025
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -129,9 +129,9 @@ jobs:
                 export label_extension="-gaudi"
                 export docker_volume="/mnt/cache"
                 export docker_devices=""
-                export runs_on="ubuntu-latest"
+                export runs_on="itac-bm-emr-gaudi3-dell-2gaudi"
                 export platform=""
-                export extra_pytest=""
+                export extra_pytest="--gaudi"
                 export target=""
           esac
           echo $dockerfile

diff --git a/backends/gaudi/Makefile b/backends/gaudi/Makefile
@@ -50,11 +50,14 @@ local-dev-install: install-dependencies
 
 # In order to run the integration tests, you need to first build the image (make -C backends/gaudi image)
 run-integration-tests:
-	pip install -U pip uv
-	uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt
 	DOCKER_VOLUME=${root_dir}/data \
 	HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
-	uv run pytest --durations=0 -sv ${root_dir}/backends/gaudi/server/integration-tests
+    pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi
+
+run-integration-tests-with-all-models:
+	DOCKER_VOLUME=${root_dir}/data \
+	HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
+	pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi --gaudi-all-models
 
 # This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests
 capture-expected-outputs-for-integration-tests:

diff --git a/backends/gaudi/README.md b/backends/gaudi/README.md
@@ -99,16 +99,26 @@ curl 127.0.0.1:8080/generate \
 
 ### Integration tests
 
+Install the dependencies:
+```bash
+pip install -r integration-tests/requirements.txt
+```
+
 To run the integration tests, you need to first build the image:
 ```bash
 make -C backends/gaudi image
 ```
 
-Then run the following command to run the integration tests:
+Then run the following command to run the integration tests (CI tests):
 ```bash
 make -C backends/gaudi run-integration-tests
 ```
 
+To run the integration tests with all models, you can run the following command:
+```bash
+make -C backends/gaudi run-integration-tests-with-all-models
+```
+
 To capture the expected outputs for the integration tests, you can run the following command:
 ```bash
 make -C backends/gaudi capture-expected-outputs-for-integration-tests

diff --git a/backends/gaudi/server/integration-tests/pytest.ini b/backends/gaudi/server/integration-tests/pytest.ini
diff --git a/backends/gaudi/server/integration-tests/requirements.txt b/backends/gaudi/server/integration-tests/requirements.txt
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
@@ -1,4 +1,8 @@
-pytest_plugins = ["fixtures.neuron.service", "fixtures.neuron.export_models"]
+pytest_plugins = [
+    "fixtures.neuron.service",
+    "fixtures.neuron.export_models",
+    "fixtures.gaudi.service",
+]
 # ruff: noqa: E402
 from _pytest.fixtures import SubRequest
 from huggingface_hub.inference._generated.types.chat_completion import (
@@ -68,6 +72,15 @@ def pytest_addoption(parser):
     parser.addoption(
         "--neuron", action="store_true", default=False, help="run neuron tests"
     )
+    parser.addoption(
+        "--gaudi", action="store_true", default=False, help="run gaudi tests"
+    )
+    parser.addoption(
+        "--gaudi-all-models",
+        action="store_true",
+        default=False,
+        help="Run tests for all models instead of just the default subset",
+    )
 
 
 def pytest_configure(config):
@@ -84,6 +97,22 @@ def skip_release(item):
                 item.add_marker(pytest.mark.skip(reason="need --release option to run"))
 
         selectors.append(skip_release)
+
+    if config.getoption("--gaudi"):
+
+        def skip_not_gaudi(item):
+            if "gaudi" not in item.keywords:
+                item.add_marker(pytest.mark.skip(reason="requires --gaudi to run"))
+
+        selectors.append(skip_not_gaudi)
+    else:
+
+        def skip_gaudi(item):
+            if "gaudi" in item.keywords:
+                item.add_marker(pytest.mark.skip(reason="requires --gaudi to run"))
+
+        selectors.append(skip_gaudi)
+
     if config.getoption("--neuron"):
 
         def skip_not_neuron(item):
@@ -100,6 +129,7 @@ def skip_neuron(item):
                 item.add_marker(pytest.mark.skip(reason="requires --neuron to run"))
 
         selectors.append(skip_neuron)
+
     for item in items:
         for selector in selectors:
             selector(item)

diff --git a/...audi/server/integration-tests/conftest.py → integration-tests/fixtures/gaudi/service.py b/...audi/server/integration-tests/conftest.py → integration-tests/fixtures/gaudi/service.py
@@ -14,15 +14,21 @@
 import pytest
 from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
 from docker.errors import NotFound
-from loguru import logger
-from test_model import TEST_CONFIGS
-from text_generation import AsyncClient
-from text_generation.types import Response
+import logging
+from huggingface_hub import AsyncInferenceClient, TextGenerationOutput
+import huggingface_hub
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
+    stream=sys.stdout,
+)
+logger = logging.getLogger(__file__)
 
 # Use the latest image from the local docker build
 DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "tgi-gaudi")
 DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", None)
-HF_TOKEN = os.getenv("HF_TOKEN", None)
+HF_TOKEN = huggingface_hub.get_token()
 
 assert (
     HF_TOKEN is not None
@@ -48,12 +54,6 @@
     "cap_add": ["sys_nice"],
 }
 
-logger.add(
-    sys.stderr,
-    format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
-    level="INFO",
-)
-
 
 def stream_container_logs(container, test_name):
     """Stream container logs in a separate thread."""
@@ -69,9 +69,15 @@ def stream_container_logs(container, test_name):
         logger.error(f"Error streaming container logs: {str(e)}")
 
 
+class TestClient(AsyncInferenceClient):
+    def __init__(self, service_name: str, base_url: str):
+        super().__init__(model=base_url)
+        self.service_name = service_name
+
+
 class LauncherHandle:
-    def __init__(self, port: int):
-        self.client = AsyncClient(f"http://localhost:{port}", timeout=3600)
+    def __init__(self, service_name: str, port: int):
+        self.client = TestClient(service_name, f"http://localhost:{port}")
 
     def _inner_health(self):
         raise NotImplementedError
@@ -87,7 +93,7 @@ async def health(self, timeout: int = 60):
                 raise RuntimeError("Launcher crashed")
 
             try:
-                await self.client.generate("test")
+                await self.client.text_generation("test", max_new_tokens=1)
                 elapsed = time.time() - start_time
                 logger.info(f"Health check passed after {elapsed:.1f}s")
                 return
@@ -111,7 +117,8 @@ async def health(self, timeout: int = 60):
 
 class ContainerLauncherHandle(LauncherHandle):
     def __init__(self, docker_client, container_name, port: int):
-        super(ContainerLauncherHandle, self).__init__(port)
+        service_name = container_name  # Use container name as service name
+        super(ContainerLauncherHandle, self).__init__(service_name, port)
         self.docker_client = docker_client
         self.container_name = container_name
 
@@ -132,7 +139,8 @@ def _inner_health(self) -> bool:
 
 class ProcessLauncherHandle(LauncherHandle):
     def __init__(self, process, port: int):
-        super(ProcessLauncherHandle, self).__init__(port)
+        service_name = "process"  # Use generic name for process launcher
+        super(ProcessLauncherHandle, self).__init__(service_name, port)
         self.process = process
 
     def _inner_health(self) -> bool:
@@ -151,11 +159,13 @@ def data_volume():
 
 
 @pytest.fixture(scope="module")
-def launcher(data_volume):
+def gaudi_launcher():
     @contextlib.contextmanager
     def docker_launcher(
         model_id: str,
         test_name: str,
+        tgi_args: List[str] = None,
+        env_config: dict = None,
     ):
         logger.info(
             f"Starting docker launcher for model {model_id} and test {test_name}"
@@ -183,32 +193,40 @@ def get_free_port():
             )
             container.stop()
             container.wait()
+            container.remove()
+            logger.info(f"Removed existing container {container_name}")
         except NotFound:
             pass
         except Exception as e:
             logger.error(f"Error handling existing container: {str(e)}")
 
-        model_name = next(
-            name for name, cfg in TEST_CONFIGS.items() if cfg["model_id"] == model_id
-        )
-
-        tgi_args = TEST_CONFIGS[model_name]["args"].copy()
+        if tgi_args is None:
+            tgi_args = []
+        else:
+            tgi_args = tgi_args.copy()
 
         env = BASE_ENV.copy()
 
         # Add model_id to env
         env["MODEL_ID"] = model_id
 
-        # Add env config that is definied in the fixture parameter
-        if "env_config" in TEST_CONFIGS[model_name]:
-            env.update(TEST_CONFIGS[model_name]["env_config"].copy())
+        # Add env config that is defined in the fixture parameter
+        if env_config is not None:
+            env.update(env_config.copy())
 
-        volumes = [f"{DOCKER_VOLUME}:/data"]
+        volumes = []
+        if DOCKER_VOLUME:
+            volumes = [f"{DOCKER_VOLUME}:/data"]
         logger.debug(f"Using volume {volumes}")
 
         try:
+            logger.debug(f"Using command {tgi_args}")
             logger.info(f"Creating container with name {container_name}")
 
+            logger.debug(f"Using environment {env}")
+            logger.debug(f"Using volumes {volumes}")
+            logger.debug(f"HABANA_RUN_ARGS {HABANA_RUN_ARGS}")
+
             # Log equivalent docker run command for debugging, this is not actually executed
             container = client.containers.run(
                 DOCKER_IMAGE,
@@ -271,15 +289,16 @@ def get_free_port():
 
 
 @pytest.fixture(scope="module")
-def generate_load():
+def gaudi_generate_load():
     async def generate_load_inner(
-        client: AsyncClient, prompt: str, max_new_tokens: int, n: int
-    ) -> List[Response]:
+        client: AsyncInferenceClient, prompt: str, max_new_tokens: int, n: int
+    ) -> List[TextGenerationOutput]:
         try:
             futures = [
-                client.generate(
+                client.text_generation(
                     prompt,
                     max_new_tokens=max_new_tokens,
+                    details=True,
                     decoder_input_details=True,
                 )
                 for _ in range(n)

diff --git a/...gration-tests/capture_expected_outputs.py → ...n-tests/gaudi/capture_expected_outputs.py b/...gration-tests/capture_expected_outputs.py → ...n-tests/gaudi/capture_expected_outputs.py
@@ -3,7 +3,7 @@
 from typing import Dict, Any, Generator
 
 import pytest
-from test_model import TEST_CONFIGS
+from test_gaudi_generate import TEST_CONFIGS
 
 UNKNOWN_CONFIGS = {
     name: config