Skip to content

Gaudi: add CI #3160

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ jobs:
export label_extension="-gaudi"
export docker_volume="/mnt/cache"
export docker_devices=""
export runs_on="ubuntu-latest"
export runs_on="aws-dl1-24xlarge"
export platform=""
export extra_pytest=""
export extra_pytest="--gaudi"
export target=""
esac
echo $dockerfile
Expand Down
8 changes: 6 additions & 2 deletions backends/gaudi/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,14 @@ local-dev-install: install-dependencies

# In order to run the integration tests, you need to first build the image (make -C backends/gaudi image)
run-integration-tests:
uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt
DOCKER_VOLUME=${root_dir}/data \
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
uv run pytest --durations=0 -sv ${root_dir}/backends/gaudi/server/integration-tests
pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi

run-integration-tests-with-all-models:
DOCKER_VOLUME=${root_dir}/data \
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
pytest --durations=0 -s -vv ${root_dir}/integration-tests --gaudi --gaudi-all-models

# This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests
capture-expected-outputs-for-integration-tests:
Expand Down
7 changes: 6 additions & 1 deletion backends/gaudi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,16 @@ To run the integration tests, you need to first build the image:
make -C backends/gaudi image
```

Then run the following command to run the integration tests:
Then run the following command to run the integration tests (CI tests):
```bash
make -C backends/gaudi run-integration-tests
```

To run the integration tests with all models, you can run the following command:
```bash
make -C backends/gaudi run-integration-tests-with-all-models
```

To capture the expected outputs for the integration tests, you can run the following command:
```bash
make -C backends/gaudi capture-expected-outputs-for-integration-tests
Expand Down
2 changes: 0 additions & 2 deletions backends/gaudi/server/integration-tests/pytest.ini

This file was deleted.

7 changes: 0 additions & 7 deletions backends/gaudi/server/integration-tests/requirements.txt

This file was deleted.

44 changes: 31 additions & 13 deletions integration-tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
pytest_plugins = ["fixtures.neuron.service", "fixtures.neuron.export_models"]
pytest_plugins = [
"fixtures.neuron.service",
"fixtures.neuron.export_models",
"fixtures.gaudi.service",
]
# ruff: noqa: E402
from _pytest.fixtures import SubRequest
from huggingface_hub.inference._generated.types.chat_completion import (
Expand Down Expand Up @@ -47,7 +51,6 @@ def request(self, *args, **kwargs):
ChatComplete,
ChatCompletionChunk,
ChatCompletionComplete,
Completion,
Details,
Grammar,
InputToken,
Expand All @@ -68,6 +71,15 @@ def pytest_addoption(parser):
parser.addoption(
"--neuron", action="store_true", default=False, help="run neuron tests"
)
parser.addoption(
"--gaudi", action="store_true", default=False, help="run gaudi tests"
)
parser.addoption(
"--gaudi-all-models",
action="store_true",
default=False,
help="Run tests for all models instead of just the default subset",
)


def pytest_configure(config):
Expand All @@ -84,6 +96,22 @@ def skip_release(item):
item.add_marker(pytest.mark.skip(reason="need --release option to run"))

selectors.append(skip_release)

if config.getoption("--gaudi"):

def skip_not_gaudi(item):
if "gaudi" not in item.keywords:
item.add_marker(pytest.mark.skip(reason="requires --gaudi to run"))

selectors.append(skip_not_gaudi)
else:

def skip_gaudi(item):
if "gaudi" in item.keywords:
item.add_marker(pytest.mark.skip(reason="requires --gaudi to run"))

selectors.append(skip_gaudi)

if config.getoption("--neuron"):

def skip_not_neuron(item):
Expand All @@ -100,6 +128,7 @@ def skip_neuron(item):
item.add_marker(pytest.mark.skip(reason="requires --neuron to run"))

selectors.append(skip_neuron)

for item in items:
for selector in selectors:
selector(item)
Expand Down Expand Up @@ -131,7 +160,6 @@ def _serialize(
or isinstance(data, ChatComplete)
or isinstance(data, ChatCompletionChunk)
or isinstance(data, ChatCompletionComplete)
or isinstance(data, Completion)
or isinstance(data, OAIChatCompletionChunk)
or isinstance(data, OAICompletion)
):
Expand Down Expand Up @@ -188,8 +216,6 @@ def _convert_data(data):
if isinstance(choices, List) and len(choices) >= 1:
if "delta" in choices[0]:
return ChatCompletionChunk(**data)
if "text" in choices[0]:
return Completion(**data)
return ChatComplete(**data)
else:
return Response(**data)
Expand Down Expand Up @@ -282,9 +308,6 @@ def eq_details(details: Details, other: Details) -> bool:
)
)

def eq_completion(response: Completion, other: Completion) -> bool:
return response.choices[0].text == other.choices[0].text

def eq_chat_complete(response: ChatComplete, other: ChatComplete) -> bool:
return (
response.choices[0].message.content == other.choices[0].message.content
Expand Down Expand Up @@ -329,11 +352,6 @@ def eq_response(response: Response, other: Response) -> bool:
if len(serialized_data) == 0:
return len(snapshot_data) == len(serialized_data)

if isinstance(serialized_data[0], Completion):
return len(snapshot_data) == len(serialized_data) and all(
[eq_completion(r, o) for r, o in zip(serialized_data, snapshot_data)]
)

if isinstance(serialized_data[0], ChatComplete):
return len(snapshot_data) == len(serialized_data) and all(
[eq_chat_complete(r, o) for r, o in zip(serialized_data, snapshot_data)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,23 @@
import pytest
from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
from docker.errors import NotFound
from loguru import logger
from test_model import TEST_CONFIGS
import logging
from gaudi.test_gaudi_generate import TEST_CONFIGS
from text_generation import AsyncClient
from text_generation.types import Response
import huggingface_hub

logging.basicConfig(
level=logging.INFO,
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
stream=sys.stdout,
)
logger = logging.getLogger(__file__)

# Use the latest image from the local docker build
DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", "tgi-gaudi")
DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", None)
HF_TOKEN = os.getenv("HF_TOKEN", None)
HF_TOKEN = huggingface_hub.get_token()

assert (
HF_TOKEN is not None
Expand All @@ -48,12 +56,6 @@
"cap_add": ["sys_nice"],
}

logger.add(
sys.stderr,
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
level="INFO",
)


def stream_container_logs(container, test_name):
"""Stream container logs in a separate thread."""
Expand Down Expand Up @@ -151,7 +153,7 @@ def data_volume():


@pytest.fixture(scope="module")
def launcher(data_volume):
def gaudi_launcher(event_loop):
@contextlib.contextmanager
def docker_launcher(
model_id: str,
Expand Down Expand Up @@ -188,27 +190,28 @@ def get_free_port():
except Exception as e:
logger.error(f"Error handling existing container: {str(e)}")

model_name = next(
name for name, cfg in TEST_CONFIGS.items() if cfg["model_id"] == model_id
)

tgi_args = TEST_CONFIGS[model_name]["args"].copy()
tgi_args = TEST_CONFIGS[test_name]["args"].copy()

env = BASE_ENV.copy()

# Add model_id to env
env["MODEL_ID"] = model_id

# Add env config that is definied in the fixture parameter
if "env_config" in TEST_CONFIGS[model_name]:
env.update(TEST_CONFIGS[model_name]["env_config"].copy())
if "env_config" in TEST_CONFIGS[test_name]:
env.update(TEST_CONFIGS[test_name]["env_config"].copy())

volumes = [f"{DOCKER_VOLUME}:/data"]
logger.debug(f"Using volume {volumes}")

try:
logger.debug(f"Using command {tgi_args}")
logger.info(f"Creating container with name {container_name}")

logger.debug(f"Using environment {env}")
logger.debug(f"Using volumes {volumes}")
logger.debug(f"HABANA_RUN_ARGS {HABANA_RUN_ARGS}")

# Log equivalent docker run command for debugging, this is not actually executed
container = client.containers.run(
DOCKER_IMAGE,
Expand Down Expand Up @@ -271,7 +274,7 @@ def get_free_port():


@pytest.fixture(scope="module")
def generate_load():
def gaudi_generate_load():
async def generate_load_inner(
client: AsyncClient, prompt: str, max_new_tokens: int, n: int
) -> List[Response]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import Dict, Any, Generator

import pytest
from test_model import TEST_CONFIGS
from test_gaudi_generate import TEST_CONFIGS

UNKNOWN_CONFIGS = {
name: config
Expand Down
Loading
Loading