Merge pull request #135 from amosproj/93-reduce-inference-time

fhilgers · web-flow · commit 3d56f17e247b · 2025-12-02T23:43:46.000+01:00
93 reduce inference time

Signed-off-by: Felix Hilgers &lt;felix.hilgers@fau.de&gt;
diff --git a/src/backend/analyzer/routes.py b/src/backend/analyzer/routes.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: MIT
 import asyncio
 import json
+import cv2
 import logging
 
 import numpy as np
@@ -37,6 +38,15 @@ def __init__(self) -> None:
         self._processing_task: asyncio.Task[None] | None = None
         self._intrinsics_logged: bool = False
 
+        self.max_consecutive_errors = 5
+        # adaptive downscaling parameters
+        self.target_scale_init = config.TARGET_SCALE_INIT
+        self.smooth_factor = config.SMOOTH_FACTOR
+        self.min_scale = config.MIN_SCALE
+        self.max_scale = config.MAX_SCALE
+        # adaptive frame dropping parameters
+        self.fps_threshold = config.FPS_THRESHOLD
+
     async def connect(self, websocket: WebSocket) -> None:
         """Accept a new WebSocket connection."""
         await websocket.accept()
@@ -107,9 +117,10 @@ async def _process_frames(self, source_track: MediaStreamTrack) -> None:
         frame_id = 0
         last_fps_time = asyncio.get_event_loop().time()
         fps_counter = 0
-        current_fps = 0.0
+        current_fps = 30.0
         consecutive_errors = 0
-        max_consecutive_errors = 5
+
+        target_scale = self.target_scale_init
 
         try:
             while self.active_connections:
@@ -121,7 +132,8 @@ async def _process_frames(self, source_track: MediaStreamTrack) -> None:
                     except asyncio.TimeoutError:
                         logging.warning("Frame receive timeout, skipping...")
                         consecutive_errors += 1
-                        if consecutive_errors >= max_consecutive_errors:
+
+                        if consecutive_errors >= self.max_consecutive_errors:
                             logging.error(
                                 "Too many consecutive timeouts, reconnecting..."
                             )
@@ -133,7 +145,7 @@ async def _process_frames(self, source_track: MediaStreamTrack) -> None:
                         )
                         consecutive_errors += 1
 
-                        if consecutive_errors >= max_consecutive_errors:
+                        if consecutive_errors >= self.max_consecutive_errors:
                             # full reconnect
                             if self._webcam_session is not None:
                                 await self._webcam_session.close()
@@ -170,21 +182,45 @@ async def _process_frames(self, source_track: MediaStreamTrack) -> None:
                         fps_counter = 0
                         last_fps_time = current_time
 
+                        if current_fps < 10:
+                            target_scale -= self.smooth_factor
+                        elif current_fps < 18:
+                            target_scale -= self.smooth_factor * 0.5
+                        else:
+                            target_scale += self.smooth_factor * 0.8
+
+                        target_scale = max(
+                            self.min_scale, min(self.max_scale, target_scale)
+                        )
+                        print(
+                            f"[Adaptive Res] Scale={target_scale:.2f} | FPS={current_fps:.1f}"
+                        )
+
+                    # Resize frame for processing
+                    if target_scale < 0.98:
+                        new_w = int(frame_array.shape[1] * target_scale)
+                        new_h = int(frame_array.shape[0] * target_scale)
+                        frame_small = cv2.resize(frame_array, (new_w, new_h))
+                    else:
+                        frame_small = frame_array
+
+                    sample_rate = 2 if current_fps < self.fps_threshold else 4
+
                     # Run ML inference every 3rd frame and collect detections
-                    if not self.active_connections or frame_id % 3 != 0:
+                    if not self.active_connections or frame_id % sample_rate != 0:
                         continue
 
                     # YOLO detection
-                    detections = await detector.infer(frame_array)
+                    detections = await detector.infer(frame_small)
 
                     if not detections:
                         continue
 
                     # Distance estimation
-                    distances = estimator.estimate_distance_m(frame_array, detections)
+                    distances = estimator.estimate_distance_m(frame_small, detections)
 
                     metadata = self._build_metadata_message(
-                        frame_rgb=frame_array,
+                        frame_rgb=frame_small,
                         detections=detections,
                         distances=distances,
                         timestamp=current_time,
@@ -196,7 +232,7 @@ async def _process_frames(self, source_track: MediaStreamTrack) -> None:
                     await self._send_metadata(metadata)
 
                     # Small delay to prevent overwhelming
-                    await asyncio.sleep(0.033)  # ~30 FPS processing
+                    # await asyncio.sleep(0.033)  # ~30 FPS processing
 
                 except Exception as e:
                     logging.warning(f"Frame processing error: {e}")
diff --git a/src/backend/common/config.py b/src/backend/common/config.py
@@ -12,6 +12,31 @@ class Config:
     # Camera settings
     CAMERA_INDEX: int = int(os.getenv("CAMERA_INDEX", "0"))
 
+    REGION_SIZE = int(
+        os.getenv("REGION_SIZE", "5")
+    )  # size of square region for depth median
+    SCALE_FACTOR = float(
+        os.getenv("SCALE_FACTOR", "432.0")
+    )  # empirical calibration factor
+    UPDATE_FREQ = int(
+        os.getenv("UPDATE_FREQ", "2")
+    )  # number of frames between depth updates
+
+    # adaptive downsampling settings
+    TARGET_SCALE_INIT: float = float(
+        os.getenv("TARGET_SCALE_INIT", "0.8")
+    )  # initial downscale factor for images
+    SMOOTH_FACTOR: float = float(
+        os.getenv("SMOOTH_FACTOR", "0.15")
+    )  # smoothing factor for scale updates
+    MIN_SCALE: float = float(os.getenv("MIN_SCALE", "0.2"))  # minimum allowed scale
+    MAX_SCALE: float = float(os.getenv("MAX_SCALE", "1.0"))  # maximum allowed scale
+
+    # adaptive frame dropping
+    FPS_THRESHOLD: float = float(
+        os.getenv("FPS_THRESHOLD", "15.0")
+    )  # threshold FPS for skipping more frames
+
     # Depth estimation settings
     REGION_SIZE = int(os.getenv("REGION_SIZE", "5"))
     SCALE_FACTOR = float(os.getenv("SCALE_FACTOR", "432.0"))
diff --git a/src/backend/common/core/depth.py b/src/backend/common/core/depth.py
@@ -9,7 +9,7 @@
 from typing import Callable, Literal, Optional
 
 from common.config import config
-from common.core.contracts import DepthEstimator, Detection
+from common.core.contracts import DepthEstimator
 
 # Factories let us swap depth estimation backends without changing call sites.
 DepthEstimatorFactory = Callable[[Optional[Path]], DepthEstimator]
@@ -49,10 +49,14 @@ def __init__(
             midas_cache_directory: Custom directory for PyTorch Hub cache.
                 If None, uses PyTorch's default cache location.
         """
-
-        self.region_size = config.REGION_SIZE  # size of region around bbox center
+        self.region_size = (
+            config.REGION_SIZE
+        )  # size of region around bbox center to sample depth
         self.scale_factor = config.SCALE_FACTOR  # empirical calibration factor
+        self.update_freq = config.UPDATE_FREQ  # frames between depth updates
 
+        self.update_id = -1
+        self.last_depths: list[float] = []
         self.model_type = model_type
         self.midas_model = midas_model
         self.device = (
@@ -67,19 +71,24 @@ def __init__(
             .to(self.device)
             .eval()
         )
-        # MiDaS transforms
+        # get MiDaS transforms
         midas_transforms = torch.hub.load(midas_model, "transforms", trust_repo=True)
         if model_type in {"DPT_Large", "DPT_Hybrid"}:
             self.transform = midas_transforms.dpt_transform
         else:
             self.transform = midas_transforms.small_transform
 
     def estimate_distance_m(
-        self, frame_rgb: np.ndarray, detections: list[Detection]
+        self, frame_rgb: np.ndarray, dets: list[tuple[int, int, int, int, int, float]]
     ) -> list[float]:
         """Estimate distance in meters for each detection based on depth map.
 
         Returns list of distances in meters."""
+        self.update_id += 1
+        if self.update_id % self.update_freq != 0 and len(self.last_depths) == len(
+            dets
+        ):
+            return self.last_depths
         h, w, _ = frame_rgb.shape
 
         input_batch = self.transform(frame_rgb).to(self.device)
@@ -93,7 +102,7 @@ def estimate_distance_m(
             ).squeeze()
         depth_map = prediction.cpu().numpy()
         distances = []
-        for x1, y1, x2, y2, _cls_id, _conf in detections:
+        for x1, y1, x2, y2, _cls_id, _conf in dets:
             # extract 5x5 central region of bbox and clip to image bounds
             cx = int((x1 + x2) / 2)
             cy = int((y1 + y2) / 2)
@@ -109,6 +118,7 @@ def estimate_distance_m(
             depth_value = max(np.mean(region), 1e-6)  # avoid div by zero
 
             distances.append(float(self.scale_factor / depth_value))
+        self.last_depths = distances
         return distances