|
11 | 11 | """ |
12 | 12 |
|
13 | 13 | import base64 |
| 14 | +import math |
| 15 | +import os |
| 16 | +import subprocess |
14 | 17 | import sys |
| 18 | +import tempfile |
15 | 19 | from typing import Dict, Any, List, Optional, Tuple |
16 | 20 |
|
17 | 21 | import numpy as np |
@@ -116,6 +120,119 @@ def runInference(self, frames: List[Dict[str, Any]]) -> Dict[str, Any]: |
116 | 120 | log.error(f"MapAnything inference (frames) failed: {e}") |
117 | 121 | raise RuntimeError(f"MapAnything inference (frames) failed: {e}") |
118 | 122 |
|
| 123 | + def _maxFramesForTimeBudget( |
| 124 | + self, |
| 125 | + time_budget_seconds: float, |
| 126 | + overhead: float, |
| 127 | + ) -> int: |
| 128 | + |
| 129 | + cpu_sec_per_frame = float(os.getenv("MAPANYTHING_CPU_SEC_PER_FRAME", "10")) |
| 130 | + cuda_sec_per_frame = float(os.getenv("MAPANYTHING_CUDA_SEC_PER_FRAME", "0.8")) |
| 131 | + sec_per_frame = cpu_sec_per_frame |
| 132 | + if self.device.startswith("cuda") and cuda_sec_per_frame: |
| 133 | + sec_per_frame = cuda_sec_per_frame |
| 134 | + |
| 135 | + usable = max(0.0, time_budget_seconds - overhead) |
| 136 | + if usable <= 0: |
| 137 | + return 0 |
| 138 | + |
| 139 | + # conservative: floor |
| 140 | + max_frames = int(math.floor(usable / max(1e-6, sec_per_frame))) |
| 141 | + return max_frames |
| 142 | + |
| 143 | + # Put in ReconstructionModel base class |
| 144 | + def _framesFromVideoAsBase64Dicts( |
| 145 | + self, |
| 146 | + video_path: str, |
| 147 | + max_frames: int, |
| 148 | + use_keyframes: bool = True, |
| 149 | + sample_every_n: int = 10, |
| 150 | + jpeg_quality: int = 85, |
| 151 | + max_side: Optional[int] = 960, |
| 152 | + ) -> List[Dict[str, Any]]: |
| 153 | + """ |
| 154 | + Extract frames using ffmpeg and return: |
| 155 | + [{"data": "<base64-encoded-jpeg>"}, ...] |
| 156 | +
|
| 157 | + Modes: |
| 158 | + - use_keyframes=True: extract TRUE keyframes (I-frames) |
| 159 | + - use_keyframes=False: sample every N frames using select filter |
| 160 | + """ |
| 161 | + if max_frames < 1: |
| 162 | + return [] |
| 163 | + |
| 164 | + if not os.path.isfile(video_path): |
| 165 | + raise ValueError(f"Video file not found: {video_path}") |
| 166 | + |
| 167 | + if sample_every_n < 1: |
| 168 | + sample_every_n = 1 |
| 169 | + |
| 170 | + # Map jpeg_quality (1..100) -> ffmpeg mjpeg qscale (2..31), where 2 is best quality |
| 171 | + qscale = int(round(31 - (np.clip(jpeg_quality, 1, 100) / 100.0) * 29)) |
| 172 | + qscale = int(np.clip(qscale, 2, 31)) |
| 173 | + |
| 174 | + vf_parts: List[str] = [] |
| 175 | + |
| 176 | + # If not keyframes, use select filter to sample frames |
| 177 | + if not use_keyframes: |
| 178 | + # keep frames where n % sample_every_n == 0 |
| 179 | + vf_parts.append(f"select='not(mod(n\\,{sample_every_n}))'") |
| 180 | + else: |
| 181 | + log.info("Using key frames") |
| 182 | + |
| 183 | + # Optional downscale: keep aspect ratio, cap longest side |
| 184 | + if max_side is not None and max_side > 0: |
| 185 | + vf_parts.append( |
| 186 | + f"scale='if(gte(iw,ih),min(iw,{max_side}),-2)':'if(lt(iw,ih),min(ih,{max_side}),-2)'" |
| 187 | + ) |
| 188 | + |
| 189 | + vf = ",".join(vf_parts) if vf_parts else None |
| 190 | + |
| 191 | + frames: List[Dict[str, Any]] = [] |
| 192 | + |
| 193 | + with tempfile.TemporaryDirectory(prefix="frames_") as tmpdir: |
| 194 | + out_pattern = os.path.join(tmpdir, "frame_%06d.jpg") |
| 195 | + |
| 196 | + cmd = [ |
| 197 | + "ffmpeg", |
| 198 | + "-hide_banner", |
| 199 | + "-loglevel", "error", |
| 200 | + ] |
| 201 | + |
| 202 | + # Keyframes mode: only decode keyframes |
| 203 | + if use_keyframes: |
| 204 | + cmd += ["-skip_frame", "nokey"] |
| 205 | + |
| 206 | + cmd += ["-i", video_path] |
| 207 | + |
| 208 | + if vf: |
| 209 | + cmd += ["-vf", vf] |
| 210 | + |
| 211 | + cmd += [ |
| 212 | + "-vsync", "vfr", |
| 213 | + "-frames:v", str(max_frames), |
| 214 | + "-q:v", str(qscale), |
| 215 | + out_pattern, |
| 216 | + ] |
| 217 | + |
| 218 | + try: |
| 219 | + subprocess.run(cmd, check=True) |
| 220 | + except FileNotFoundError: |
| 221 | + raise RuntimeError("ffmpeg not found. Install ffmpeg in the container/host.") |
| 222 | + except subprocess.CalledProcessError as e: |
| 223 | + mode = "keyframes" if use_keyframes else f"sample_every_n={sample_every_n}" |
| 224 | + raise RuntimeError(f"ffmpeg failed extracting frames ({mode}): {e}") |
| 225 | + |
| 226 | + # Read extracted frames back into base64 |
| 227 | + for i in range(1, max_frames + 1): |
| 228 | + fpath = os.path.join(tmpdir, f"frame_{i:06d}.jpg") |
| 229 | + if not os.path.exists(fpath): |
| 230 | + break |
| 231 | + with open(fpath, "rb") as f: |
| 232 | + frames.append({"data": base64.b64encode(f.read()).decode("utf-8")}) |
| 233 | + |
| 234 | + return frames |
| 235 | + |
119 | 236 | def getSupportedOutputs(self) -> List[str]: |
120 | 237 | """Get supported output formats.""" |
121 | 238 | return ["mesh", "pointcloud"] |
|
0 commit comments