-
Notifications
You must be signed in to change notification settings - Fork 288
[CI] Add inference performance regression tests #1140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
ac439e1
094c69f
85f11fe
e9ab196
05615f3
208cf83
9f8bf13
da54fc3
7862b4d
c5bad5d
d1ebf92
2ed3234
7724172
a9d1a36
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,228 @@ | ||||||
| # SPDX-License-Identifier: Apache-2.0 | ||||||
| import json | ||||||
| import os | ||||||
| import time | ||||||
| from datetime import datetime, timezone | ||||||
|
|
||||||
| import torch | ||||||
| import pytest | ||||||
|
|
||||||
| from fastvideo import VideoGenerator | ||||||
| from fastvideo.logger import init_logger | ||||||
| from fastvideo.worker.multiproc_executor import MultiprocExecutor | ||||||
|
|
||||||
| logger = init_logger(__name__) | ||||||
|
|
||||||
| REQUIRED_GPUS = 2 | ||||||
|
||||||
|
|
||||||
| NUM_WARMUP_RUNS = 1 | ||||||
| NUM_MEASUREMENT_RUNS = 3 | ||||||
|
|
||||||
| WAN_T2V_PARAMS = { | ||||||
| "num_gpus": | ||||||
| 2, | ||||||
| "model_path": | ||||||
| "Wan-AI/Wan2.1-T2V-1.3B-Diffusers", | ||||||
| "height": | ||||||
| 480, | ||||||
| "width": | ||||||
| 832, | ||||||
| "num_frames": | ||||||
| 45, | ||||||
| "num_inference_steps": | ||||||
| 4, | ||||||
| "guidance_scale": | ||||||
| 3, | ||||||
| "embedded_cfg_scale": | ||||||
| 6, | ||||||
| "flow_shift": | ||||||
| 7.0, | ||||||
| "seed": | ||||||
| 1024, | ||||||
| "sp_size": | ||||||
| 2, | ||||||
| "tp_size": | ||||||
| 1, | ||||||
| "vae_sp": | ||||||
| True, | ||||||
| "fps": | ||||||
| 24, | ||||||
| "neg_prompt": | ||||||
| "Bright tones, overexposed, static, blurred details, subtitles, " | ||||||
| "style, works, paintings, images, static, overall gray, worst quality, " | ||||||
| "low quality, JPEG compression residue, ugly, incomplete, extra fingers, " | ||||||
| "poorly drawn hands, poorly drawn faces, deformed, disfigured, " | ||||||
| "misshapen limbs, fused fingers, still picture, messy background, " | ||||||
| "three legs, many people in the background, walking backwards", | ||||||
| "text-encoder-precision": ("fp32", ), | ||||||
| } | ||||||
|
|
||||||
| TEST_PROMPT = ( | ||||||
| "Will Smith casually eats noodles, his relaxed demeanor contrasting " | ||||||
| "with the energetic background of a bustling street food market. " | ||||||
| "The scene captures a mix of humor and authenticity. " | ||||||
| "Mid-shot framing, vibrant lighting.") | ||||||
|
|
||||||
| # Device-aware thresholds: {gpu_name_substring: {metric: value}} | ||||||
| # Initial values are generous placeholders — calibrate after first CI run. | ||||||
| DEVICE_THRESHOLDS = { | ||||||
| "L40S": { | ||||||
| "max_generation_time_s": 60.0, | ||||||
| "max_peak_memory_mb": 20000.0, | ||||||
| }, | ||||||
| } | ||||||
|
|
||||||
| # Fallback for unknown GPUs (very generous so test still runs) | ||||||
| DEFAULT_THRESHOLDS = { | ||||||
| "max_generation_time_s": 120.0, | ||||||
| "max_peak_memory_mb": 30000.0, | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
| def _get_thresholds() -> dict: | ||||||
| device_name = torch.cuda.get_device_name() | ||||||
| for gpu_key, thresholds in DEVICE_THRESHOLDS.items(): | ||||||
| if gpu_key in device_name: | ||||||
| logger.info("Using thresholds for %s: %s", gpu_key, thresholds) | ||||||
| return thresholds | ||||||
| logger.warning("No thresholds for device '%s', using defaults", device_name) | ||||||
| return DEFAULT_THRESHOLDS | ||||||
|
|
||||||
|
|
||||||
| def _shutdown_executor(generator): | ||||||
| if generator is None: | ||||||
| return | ||||||
| if isinstance(generator.executor, MultiprocExecutor): | ||||||
| generator.executor.shutdown() | ||||||
|
|
||||||
|
|
||||||
| def _run_generation(generator, prompt, generation_kwargs): | ||||||
| """Run a single generation and return (elapsed_seconds, peak_memory_mb).""" | ||||||
| for device_id in range(torch.cuda.device_count()): | ||||||
| torch.cuda.reset_peak_memory_stats(device_id) | ||||||
|
|
||||||
| torch.cuda.synchronize() | ||||||
| start = time.perf_counter() | ||||||
| generator.generate_video(prompt, **generation_kwargs) | ||||||
| torch.cuda.synchronize() | ||||||
| elapsed = time.perf_counter() - start | ||||||
|
|
||||||
| peak_memory_bytes = max( | ||||||
| torch.cuda.max_memory_allocated(i) | ||||||
| for i in range(torch.cuda.device_count())) | ||||||
| peak_memory_mb = peak_memory_bytes / (1024 * 1024) | ||||||
|
|
||||||
| return elapsed, peak_memory_mb | ||||||
|
|
||||||
|
|
||||||
| def _write_results(results: dict) -> None: | ||||||
| """Write JSON results to the results directory.""" | ||||||
| script_dir = os.path.dirname(os.path.abspath(__file__)) | ||||||
| results_dir = os.path.join(script_dir, "results") | ||||||
| os.makedirs(results_dir, exist_ok=True) | ||||||
|
|
||||||
| timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") | ||||||
|
||||||
| timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") | |
| timestamp = datetime.now(timezone.utc).isoformat().replace(":", "-").replace(".", "-") # ISO 8601 compatible and filename-safe |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
commandfor the "Performance Tests" step includestimeout 30m. A similar timeout (timeout=1800) is also specified in therun_performance_testsfunction withinfastvideo/tests/modal/pr_test.py. It's generally better to have a single source of truth for timeouts to avoid confusion and potential conflicts. Consider removing one of these timeouts or clarifying their intended roles (e.g., Buildkite timeout as a failsafe, Modal timeout as the primary control).