montage-ai/requirements.txt at main · mfahsold/montage-ai · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Video Processing
moviepy>=2.2.1,<2.3  # Align with pyproject; 2.2.x requires pillow<12
opencv-python-headless>=4.12.0.88
soundfile>=0.12.0
# mediapipe is already in optional dependencies in pyproject.toml, keep base requirements clean.
Pillow>=10.0.0,<12.0  # moviepy 2.2.x constraint
tqdm>=4.66.0
scenedetect[opencv]>=0.6.7.1
requests>=2.32.0
numpy>=2.0.0  # opencv-python-headless 4.12+ requires numpy>=2
scipy>=1.10.0  # For mathematical path optimization (Smart Reframing)

# Data Validation
pydantic>=2.0.0

# Color Matching (Shot-to-Shot consistency)
color-matcher>=0.5.0

# HTTP / API
# (requests already listed above)

# LLM API (cgpu/Gemini integration)
openai>=1.55.0

# System Monitoring
psutil>=6.0.0

# Kubernetes API (cluster mode)
kubernetes>=29.0.0

# Timeline Export (Professional NLE Integration)
opentimelineio>=0.18.1  # Academy Software Foundation standard
jsonschema>=4.25.1  # JSON schema validation for Creative Director

# Web UI
Flask>=3.0.0
Werkzeug>=3.0.0

# Testing
pytest>=8.0.0
pytest-flask>=1.3.0
PyYAML>=6.0
redis>=5.0.0,<7.0.0  # redis 7.x has TCP_KEEPIDLE issues on some kernels
rq>=1.16.0

# Audio Analysis
# librosa/numba omitted to avoid numpy pinning; audio_analysis is FFmpeg-first.

# OPTIMIZATION Phase 3: msgpack for 22x faster cache serialization
msgpack>=1.0.0

# =============================================================================
# OPTIONAL SOTA DEPENDENCIES (2024-2025 Research)
# =============================================================================
# These provide state-of-the-art performance but are not required.
# montage-ai falls back to fast alternatives when not installed.

# SCENE DETECTION (Priority: AutoShot > TransNetV2 > PySceneDetect)
# ------------------------------------------------------------------
# AutoShot: +4.2% F1 over TransNetV2, NAS-optimized 3D ConvNet + Transformer
#   pip install autoshot  # or clone from https://github.com/wentaozhu/AutoShot
#
# TransNetV2: Neural network scene detection, 250fps on GPU
#   pip install torch transnetv2

# BEAT DETECTION (Priority: madmom > FFmpeg)
# ------------------------------------------------------------------
# madmom: SOTA beat tracking using RNN, ~0.02-0.06s more accurate than librosa
#   pip install madmom
# Note: madmom requires Cython and may need: pip install cython first

# VLM CLIP SELECTION (Query-based intelligent clip selection)
# ------------------------------------------------------------------
# Qwen2.5-VL: Best for long videos (1h+ support), temporal grounding
#   pip install transformers accelerate
#   # Model: Qwen/Qwen2-VL-7B-Instruct
#
# VILA/NVILA: NVIDIA VLM, good for Jetson deployment
#   pip install vila  # or clone from https://github.com/NVlabs/VILA
#
# InternVideo2: 60+ video tasks, needle-in-haystack search
#   pip install internvideo2  # or clone from https://github.com/OpenGVLab/InternVideo

# GPU VIDEO STABILIZATION (Priority: CuVista > vidstab)
# ------------------------------------------------------------------
# CuVista: CUDA/OpenCL/AVX512 accelerated, 5-10x faster than CPU vidstab
#   Install from: https://github.com/RainerMtb/cuvista
#   Requires: CUDA toolkit or OpenCL runtime

# GPU AUDIO ANALYSIS (Optional spectral analysis acceleration)
# ------------------------------------------------------------------
# PyTorch or CuPy for GPU-accelerated FFT
#   pip install torch  # CUDA/MPS/ROCm support
#   # or
#   pip install cupy-cuda12x  # CUDA-only, faster for pure FFT