Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 8 additions & 18 deletions tests/integration_tests/Launch_Integration_Tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,32 +19,22 @@ export HF_DATASETS_OFFLINE="1"
export TRANSFORMERS_OFFLINE="1"
export HF_DATASETS_CACHE="${HF_HOME}/datasets"

mkdir -p /checkpoints && \
ln -s /home/TestData/nemo2_ckpt/llama-3_2-1b-instruct_v2.0 /checkpoints/llama-3_2-1b-instruct_v2.0

# FIXME(martas): temporary WAR for broken deps in NeMo FW nightly image
pip install nvidia-lm-eval[math]

SCRIPT_DIR=$(dirname "$0")
PROJECT_DIR=$SCRIPT_DIR/../../
cd $PROJECT_DIR

nemo2_ckpt_path="/home/TestData/nemo2_ckpt/llama-3_2-1b-instruct_v2.0"
model_name="megatron_model"
port=8886

python /opt/Export-Deploy/scripts/deploy/nlp/deploy_ray_inframework.py \
--nemo_checkpoint $nemo2_ckpt_path \
--num_gpus 1 \
--tensor_model_parallel_size 1 \
--pipeline_model_parallel_size 1 \
--model_id $model_name \
--port $port &

deploy_pid=$!

coverage run \
--data-file=.coverage.integration_tests \
--source=src/ \
-m pytest \
-o log_cli=true \
-o log_cli_level=INFO \
-m "not pleasefixme" \
-v -s \
tests/integration_tests/nemo_fw/test_deployment.py

kill $deploy_pid
tests/integration_tests
coverage combine -q
41 changes: 0 additions & 41 deletions tests/integration_tests/nemo_fw/test_deployment.py

This file was deleted.

14 changes: 3 additions & 11 deletions tests/integration_tests/nemo_fw/test_hf_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@
EvaluationTarget,
)

# FIXME(martas): EF packages pre 25.09 use old imports from nvidia_eval_commons
from nvidia_eval_commons.api.api_dataclasses import (
EvaluationResult as LegacyEvaluationResult,
)

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -59,7 +54,7 @@ def deployment_process():
endpoint_url=f"http://0.0.0.0:{port}/v1/completions/",
endpoint_type="completions",
model_name=model_name,
max_retries=600,
max_retries=100,
)
assert completions_ready, (
"Completions endpoint is not ready. Please look at the deploy process log for the error"
Expand All @@ -69,7 +64,7 @@ def deployment_process():
endpoint_url=f"http://0.0.0.0:{port}/v1/chat/completions",
endpoint_type="chat",
model_name=model_name,
max_retries=600,
max_retries=1, # if completions endpoint is ready, chat should be ready too
)
assert chat_ready, (
"Chat endpoint is not ready. Please look at the deploy process log for the error"
Expand Down Expand Up @@ -128,8 +123,5 @@ def test_evaluation(eval_type, endpoint_type, eval_params, tmp_path):
type=eval_type, params=ConfigParams(**eval_params), output_dir=str(tmp_path)
)
results = evaluate(target_cfg=eval_target, eval_cfg=eval_config)
# FIXME(martas): EF packages pre 25.09 use old imports from nvidia_eval_commons
assert isinstance(results, EvaluationResult) or isinstance(
results, LegacyEvaluationResult
)
assert isinstance(results, EvaluationResult)
logger.info("Evaluation completed.")
17 changes: 6 additions & 11 deletions tests/integration_tests/nemo_fw/test_nemo2_ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@
EvaluationTarget,
)

# FIXME(martas): EF packages pre 25.09 use old imports from nvidia_eval_commons
from nvidia_eval_commons.api.api_dataclasses import (
EvaluationResult as LegacyEvaluationResult,
)

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -65,7 +60,7 @@ def deployment_process():
endpoint_url=f"http://0.0.0.0:{port}/v1/completions/",
endpoint_type="completions",
model_name=model_name,
max_retries=600,
max_retries=100,
)
assert completions_ready, (
"Completions endpoint is not ready. Please look at the deploy process log for the error"
Expand All @@ -75,7 +70,7 @@ def deployment_process():
endpoint_url=f"http://0.0.0.0:{port}/v1/chat/completions",
endpoint_type="chat",
model_name=model_name,
max_retries=600,
max_retries=1, # if completions endpoint is ready, chat should be ready too
)
assert chat_ready, (
"Chat endpoint is not ready. Please look at the deploy process log for the error"
Expand All @@ -95,6 +90,9 @@ def deployment_process():
subprocess.run(["pkill", f"-{signal.SIGTERM}", "tritonserver"], check=False)


# FIXME(martas): Errors out due to an MCore bug on deployment side
# enable once fixed in Export-Deploy
@pytest.mark.pleasefixme
@pytest.mark.run_only_on("GPU")
@pytest.mark.parametrize(
"eval_type,endpoint_type,eval_params",
Expand Down Expand Up @@ -133,8 +131,5 @@ def test_evaluation(eval_type, endpoint_type, eval_params, tmp_path):
type=eval_type, params=ConfigParams(**eval_params), output_dir=str(tmp_path)
)
results = evaluate(target_cfg=eval_target, eval_cfg=eval_config)
# FIXME(martas): EF packages pre 25.09 use old imports from nvidia_eval_commons
assert isinstance(results, EvaluationResult) or isinstance(
results, LegacyEvaluationResult
)
assert isinstance(results, EvaluationResult)
logger.info("Evaluation completed.")
21 changes: 8 additions & 13 deletions tests/integration_tests/nemo_fw/test_nemo2_triton.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,6 @@
EvaluationTarget,
)

# FIXME(martas): EF packages pre 25.09 use old imports from nvidia_eval_commons
from nvidia_eval_commons.api.api_dataclasses import (
EvaluationResult as LegacyEvaluationResult,
)

logger = logging.getLogger(__name__)


Expand All @@ -52,9 +47,9 @@ def deployment_process():
"1",
"--num_nodes",
"1",
"--tensor_parallelism_size",
"--tensor_model_parallel_size",
"1",
"--pipeline_parallelism_size",
"--pipeline_model_parallel_size",
"1",
"--triton_model_name",
model_name,
Expand All @@ -69,7 +64,7 @@ def deployment_process():
endpoint_url=f"http://0.0.0.0:{port}/v1/completions/",
endpoint_type="completions",
model_name=model_name,
max_retries=600,
max_retries=100,
)
assert completions_ready, (
"Completions endpoint is not ready. Please look at the deploy process log for the error"
Expand All @@ -79,7 +74,7 @@ def deployment_process():
endpoint_url=f"http://0.0.0.0:{port}/v1/chat/completions",
endpoint_type="chat",
model_name=model_name,
max_retries=600,
max_retries=1, # if completions endpoint is ready, chat should be ready too
)
assert chat_ready, (
"Chat endpoint is not ready. Please look at the deploy process log for the error"
Expand All @@ -99,6 +94,9 @@ def deployment_process():
subprocess.run(["pkill", f"-{signal.SIGTERM}", "tritonserver"], check=False)


# FIXME(martas): Errors out due to an MCore bug on deployment side
# enable once fixed in Export-Deploy
@pytest.mark.pleasefixme
@pytest.mark.run_only_on("GPU")
@pytest.mark.parametrize(
"eval_type,endpoint_type,eval_params",
Expand Down Expand Up @@ -137,8 +135,5 @@ def test_evaluation(eval_type, endpoint_type, eval_params, tmp_path):
type=eval_type, params=ConfigParams(**eval_params), output_dir=str(tmp_path)
)
results = evaluate(target_cfg=eval_target, eval_cfg=eval_config)
# FIXME(martas): EF packages pre 25.09 use old imports from nvidia_eval_commons
assert isinstance(results, EvaluationResult) or isinstance(
results, LegacyEvaluationResult
)
assert isinstance(results, EvaluationResult)
logger.info("Evaluation completed.")
3 changes: 3 additions & 0 deletions tests/integration_tests/nemo_fw/test_notebooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ def uninstall_nvidia_simple_evals():
subprocess.run(["pip", "uninstall", "-y", "nvidia-simple-evals"])


# FIXME(martas): Errors out due to an MCore bug on deployment side
# enable once fixed in Export-Deploy
@pytest.mark.pleasefixme
@pytest.mark.parametrize(
"notebook_path",
[
Expand Down
Loading