FastVideo/examples/inference/cli/v1_inference_longcat_vc.sh at 64f822034426ce5422e60e788d436c9441139968 · hao-ai-lab/FastVideo · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/bin/bash

# LongCat Video Continuation (VC) Inference Script
#
# This script runs LongCat VC inference using the fastvideo CLI.
# LongCat VC takes an input video and generates a continuation of it.
#
# Usage:
#   bash examples/inference/cli/v1_inference_longcat_vc.sh
#
# Prerequisites:
#   - Install fastvideo: pip install -e .
#   - The model weights will be auto-downloaded from HuggingFace
#   - Or use local weights if you have them

num_gpus=1

export FASTVIDEO_ATTENTION_BACKEND=

# Model path options:
# Option 1: HuggingFace model (auto-downloaded)
export MODEL_BASE=FastVideo/LongCat-Video-VC-Diffusers

# Option 2: Local weights (uncomment if you have local weights)
# export MODEL_BASE=weights/longcat-vc-upload

# Input video path
VIDEO_PATH="assets/motorcycle.mp4"

# Check if video exists
if [ ! -f "$VIDEO_PATH" ]; then
    echo "Error: Video not found at $VIDEO_PATH"
    echo "Please provide a valid video path"
    exit 1
fi

fastvideo generate \
    --model-path $MODEL_BASE \
    --sp-size $num_gpus \
    --tp-size 1 \
    --num-gpus $num_gpus \
    --dit-cpu-offload False \
    --vae-cpu-offload True \
    --text-encoder-cpu-offload True \
    --pin-cpu-memory False \
    --enable-bsa False \
    --video-path "$VIDEO_PATH" \
    --num-cond-frames 13 \
    --height 480 \
    --width 832 \
    --num-frames 93 \
    --num-inference-steps 50 \
    --fps 15 \
    --guidance-scale 4.0 \
    --prompt "A person rides a motorcycle along a long, straight road that stretches between a body of water and a forested hillside. The rider steadily accelerates, keeping the motorcycle centered between the guardrails, while the scenery passes by on both sides. The video captures the journey from the rider's perspective, emphasizing the sense of motion and adventure." \
    --negative-prompt "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards" \
    --seed 42 \
    --output-path outputs_video/longcat_vc