Skip to content

Commit 91997ff

Browse files
Fix CI failures and simplify/stabilize the test suite
- Add per-test timeouts via pytest-timeout, keyed by test dir (unit 60s, gpu/example/regression 300s), call-phase only; per-test overrides where a one-time CUDA JIT compile is expected. - Pre-compile the conv3d implicit-GEMM CUDA kernel in a dedicated _extensions test so the build cost doesn't land on the first kernel test. - Relocate tests to the lane that owns their dependency: rename the gpt-oss example test dir to match the example, move the vLLM sparse-attention plugin test to gpu_vllm (drop its importorskip), and guard the diffusers import in the partial-install unit lane. - Fix the eagle example override (data.sample_size, not training.sample_size). - Consolidate diffusers test model-path constants into a single SDXL_PATH and use tiny pipelines for the plumbing tests (incl. cache_diffusion); normalize local_id basenames. - llm_sparsity hf_pts.py: reuse get_dataset_dataloader, honor --model_max_length, and left-pad for calibration. - Skip the upstream-broken nemotron-sft-agentic-v2 gated smoke dataset. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
1 parent d4f9a2c commit 91997ff

22 files changed

Lines changed: 132 additions & 52 deletions

File tree

.github/workflows/example_tests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535
strategy:
3636
fail-fast: false
3737
matrix:
38-
example: [gpt_oss, llm_distill, llm_qat, llm_sparsity, diffusers_sparsity, specdec_bench]
38+
example: [diffusers_sparsity, gpt-oss, llm_distill, llm_qat, llm_sparsity, specdec_bench]
3939
include:
4040
- example: speculative_decoding
4141
docker_image: "26.01"
@@ -104,7 +104,7 @@ jobs:
104104
with:
105105
docker_image: "nvcr.io/nvidia/tensorrt:26.05-py3"
106106
example: ${{ matrix.example }}
107-
timeout_minutes: 30
107+
timeout_minutes: 45
108108
pip_install_extras: "[onnx,hf,dev-test]"
109109
runner: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && 'linux-amd64-gpu-rtxpro6000-latest-1' || 'linux-amd64-gpu-rtxpro6000-latest-2' }}
110110

.github/workflows/gpu_tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
matrix:
4040
include:
4141
- example: gpu
42-
timeout: 75
42+
timeout: 60
4343
container_image: nvcr.io/nvidia/pytorch:26.05-py3
4444
- example: gpu_megatron
4545
timeout: 60

.github/workflows/unit_tests.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ jobs:
5858
linux:
5959
needs: [check-dco]
6060
runs-on: ubuntu-latest
61-
timeout-minutes: 30
61+
timeout-minutes: 15
6262
steps:
6363
- uses: actions/checkout@v6
6464
- uses: ./.github/actions/ubuntu-setup
@@ -78,7 +78,7 @@ jobs:
7878
if: needs.check-file-changes.outputs.any_changed == 'true'
7979
needs: [linux, check-file-changes]
8080
runs-on: windows-latest
81-
timeout-minutes: 30
81+
timeout-minutes: 15
8282
steps:
8383
- uses: actions/checkout@v6
8484
- uses: actions/setup-python@v6
@@ -90,7 +90,7 @@ jobs:
9090
if: needs.check-file-changes.outputs.any_changed == 'true'
9191
needs: [linux, check-file-changes]
9292
runs-on: ubuntu-latest
93-
timeout-minutes: 30
93+
timeout-minutes: 15
9494
strategy:
9595
fail-fast: false
9696
matrix:
@@ -115,7 +115,7 @@ jobs:
115115
if: needs.check-file-changes.outputs.any_changed == 'true'
116116
needs: [linux, check-file-changes]
117117
runs-on: ubuntu-latest
118-
timeout-minutes: 30
118+
timeout-minutes: 15
119119
strategy:
120120
fail-fast: false
121121
matrix:

examples/llm_sparsity/weight_sparsity/hf_pts.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ def get_tokenizer(ckpt_path: str, model_max_length: int, trust_remote_code: bool
5757
tokenizer = AutoTokenizer.from_pretrained(
5858
ckpt_path,
5959
model_max_length=model_max_length,
60-
padding_side="right",
60+
# Left padding is recommended for calibration (get_dataset_dataloader warns otherwise).
61+
padding_side="left",
6162
use_fast=False,
6263
trust_remote_code=trust_remote_code,
6364
)
@@ -109,7 +110,7 @@ def main(args):
109110
tokenizer=tokenizer,
110111
batch_size=args.batch_size,
111112
num_samples=args.calib_size,
112-
max_sample_length=512,
113+
max_sample_length=args.model_max_length,
113114
device=args.device,
114115
)
115116

@@ -146,8 +147,9 @@ def main(args):
146147
parser.add_argument("--dtype", help="Model data type.", default="fp16")
147148
parser.add_argument(
148149
"--model_max_length",
150+
type=int,
149151
default=2048,
150-
help="Maximum sequence length. Sequences will be right padded (and possibly truncated).",
152+
help="Maximum sequence length used for both the tokenizer and calibration sequences.",
151153
)
152154
parser.add_argument("--batch_size", help="Batch size for calibration.", type=int, default=1)
153155
parser.add_argument(

examples/torch_onnx/torch_quant_to_onnx.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,7 @@ def main():
510510
"--trt_builder_optimization_level",
511511
type=int,
512512
default=4,
513+
choices=range(6),
513514
help="trtexec --builderOptimizationLevel (0-5). Lower is much faster to build "
514515
"(useful for tests that only verify the engine builds); higher tunes harder.",
515516
)

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,8 @@ skips = [
306306
# print execution time for 50 slowest tests and generate coverage reports
307307
addopts = "-v -ra --instafail --cov-report=term-missing --cov-report=html --cov-report=xml:coverage.xml --cov-config=pyproject.toml --durations=50 --strict-markers"
308308
pythonpath = ["tests/"]
309+
# Apply per-test timeouts (see tests/conftest.py) to the test call only, not fixture setup/teardown
310+
timeout_func_only = true
309311
markers = [
310312
"integration: Tests that require external services or other non-hermetic dependencies",
311313
"manual: Only run when --run-manual is given",

tests/_test_utils/examples/models.py

Lines changed: 13 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -48,38 +48,28 @@ def _select_path(remote_id: str, local_id: str) -> str:
4848
local_id="TinyLlama-1.1B-Chat-v1.0",
4949
)
5050

51-
SXDL_PATH = _select_path(
52-
remote_id="stabilityai/stable-diffusion-xl-base-1.0",
53-
local_id="stable-diffusion-xl-base-1.0",
54-
)
55-
56-
PIXART_PATH = _select_path(
57-
remote_id="PixArt-alpha/PixArt-XL-2-1024-MS",
58-
local_id="PixArt-XL-2-1024-MS",
59-
)
60-
61-
LLAVA_PATH = _select_path(
62-
remote_id="llava-hf/llava-1.5-7b-hf",
63-
local_id="llava-1.5-7b-hf",
64-
)
65-
6651
QWEN_VL_PATH = _select_path(
6752
remote_id="Qwen/Qwen3-VL-2B-Instruct",
6853
local_id="Qwen3-VL-2B-Instruct",
6954
)
7055

7156
# Diffusers
72-
FLUX_SCHNELL_PATH = _select_path(
73-
remote_id="hf-internal-testing/tiny-flux-pipe",
74-
local_id="black-forest-labs/FLUX.1-schnell",
75-
)
76-
77-
SDXL_1_0_PATH = _select_path(
57+
SDXL_PATH = _select_path(
7858
remote_id="hf-internal-testing/tiny-sdxl-pipe",
79-
local_id="stabilityai/stable-diffusion-xl-base-1.0",
59+
local_id="stable-diffusion-xl-base-1.0",
8060
)
8161

8262
SD3_PATH = _select_path(
8363
remote_id="hf-internal-testing/tiny-sd3-pipe",
84-
local_id="stabilityai/stable-diffusion-3-medium-diffusers",
64+
local_id="stable-diffusion-3-medium-diffusers",
65+
)
66+
67+
FLUX_SCHNELL_PATH = _select_path(
68+
remote_id="hf-internal-testing/tiny-flux-pipe",
69+
local_id="FLUX.1-schnell",
70+
)
71+
72+
PIXART_PATH = _select_path(
73+
remote_id="PixArt-alpha/PixArt-XL-2-1024-MS",
74+
local_id="PixArt-XL-2-1024-MS",
8575
)

tests/conftest.py

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,22 @@ def pytest_addoption(parser):
4444
)
4545

4646

47+
# Default per-test `call` wall-clock cap (seconds) by top-level tests/ subdirectory
48+
# Every collectible test group must be listed here else collection errors occur
49+
# A test can override its cap by adding ``@pytest.mark.timeout(...)``
50+
_DEFAULT_TIMEOUT = {
51+
"examples": 300,
52+
"gpu": 120,
53+
"gpu_megatron": 120,
54+
"gpu_trtllm": 60,
55+
"gpu_vllm": 60,
56+
"regression": 180,
57+
"unit": 60,
58+
}
59+
60+
4761
def pytest_collection_modifyitems(config, items):
48-
"""Skip tests with specific markers unless their corresponding flag is provided."""
62+
"""Skip flag-gated tests and apply a default per-test timeout based on the test directory."""
4963
skip_marks = [
5064
("manual", "--run-manual"),
5165
("release", "--run-release"),
@@ -58,6 +72,23 @@ def pytest_collection_modifyitems(config, items):
5872
if mark_name in item.keywords:
5973
item.add_marker(skipper)
6074

75+
tests_root = Path(__file__).parent
76+
for item in items:
77+
if item.get_closest_marker("timeout") is not None or not item.path.is_relative_to(
78+
tests_root
79+
):
80+
continue
81+
# First path component under tests/ is the group dir (unit, gpu, examples, ...).
82+
# Crash loudly (rather than silently skip) if a group has no configured default, so a
83+
# newly added tests/<group>/ must be given an explicit timeout in the mapping above.
84+
group = item.path.relative_to(tests_root).parts[0]
85+
if group not in _DEFAULT_TIMEOUT:
86+
raise pytest.UsageError(
87+
f"tests/{group}/ has no default timeout; add '{group}' to "
88+
"_DEFAULT_TIMEOUT in tests/conftest.py."
89+
)
90+
item.add_marker(pytest.mark.timeout(_DEFAULT_TIMEOUT[group]))
91+
6192

6293
@pytest.fixture
6394
def skip_on_windows():

tests/examples/diffusers/test_cache_diffusion.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import pytest
2020
import torch
21-
from _test_utils.examples.models import PIXART_PATH, SXDL_PATH
21+
from _test_utils.examples.models import PIXART_PATH, SDXL_PATH
2222
from _test_utils.examples.run_command import MODELOPT_ROOT
2323
from diffusers import DiffusionPipeline, PixArtAlphaPipeline
2424

@@ -29,9 +29,8 @@
2929

3030
def test_sdxl_cachify():
3131
pipe = DiffusionPipeline.from_pretrained(
32-
SXDL_PATH,
32+
SDXL_PATH,
3333
torch_dtype=torch.float16,
34-
variant="fp16",
3534
use_safetensors=True,
3635
).to("cuda")
3736
cachify.prepare(pipe, SDXL_DEFAULT_CONFIG)

tests/examples/diffusers/test_diffusers.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from typing import NamedTuple
1818

1919
import pytest
20-
from _test_utils.examples.models import FLUX_SCHNELL_PATH, SD3_PATH, SDXL_1_0_PATH
20+
from _test_utils.examples.models import FLUX_SCHNELL_PATH, SD3_PATH, SDXL_PATH
2121
from _test_utils.examples.run_command import run_example_command
2222
from _test_utils.torch.misc import minimum_sm
2323

@@ -120,7 +120,7 @@ def inference(self, tmp_path: Path) -> None:
120120
pytest.param(
121121
DiffuserModel(
122122
name="sdxl-1.0",
123-
path=SDXL_1_0_PATH,
123+
path=SDXL_PATH,
124124
dtype="Half",
125125
format_type="fp8",
126126
quant_algo="max",
@@ -130,7 +130,7 @@ def inference(self, tmp_path: Path) -> None:
130130
),
131131
DiffuserModel(
132132
name="sdxl-1.0",
133-
path=SDXL_1_0_PATH,
133+
path=SDXL_PATH,
134134
dtype="Half",
135135
format_type="int8",
136136
quant_algo="smoothquant",
@@ -273,8 +273,8 @@ def test_wan22_quantization(wan_model: Wan22Model, tiny_wan22_path: str, tmp_pat
273273
("flux-schnell", FLUX_SCHNELL_PATH, True),
274274
("sd3-medium", SD3_PATH, False),
275275
("sd3-medium", SD3_PATH, True),
276-
("sdxl-1.0", SDXL_1_0_PATH, False),
277-
("sdxl-1.0", SDXL_1_0_PATH, True),
276+
("sdxl-1.0", SDXL_PATH, False),
277+
("sdxl-1.0", SDXL_PATH, True),
278278
],
279279
ids=[
280280
"flux_schnell_torch",

0 commit comments

Comments
 (0)