amosproj · saribx · Jan 7, 2026 · Jan 7, 2026
@@ -59,3 +59,4 @@ src/backend/models/*
 src/backend/models/*.pt
 src/backend/models/*.onnx
 src/backend/models/midas_cache/
+src/backend/checkpoints/
@@ -255,3 +255,11 @@ download-depth-anything:
 	cd src/backend && uv run python ../../scripts/download_models.py \
 	  --models depth-anything \
 	  --output-dir $(MODELS_DIR)
+
+download-depth-pro:
+	@echo "Downloading Depth Pro model..."
+	@mkdir -p $(MODELS_DIR)
+	cd src/backend && uv sync --extra inference
+	cd src/backend && uv run python ../../scripts/download_models.py \
+	  --models depth-pro \
+	  --output-dir $(MODELS_DIR)
@@ -78,6 +78,7 @@ make download-models-onnx
 make download-yolo
 make download-midas
 make download-depth-anything
+make download-depth-pro
 
 # Export models to ONNX
 make export-yolo-onnx
@@ -89,11 +90,18 @@ To start the analyzer service with ONNX backend:
 DETECTOR_BACKEND=onnx DEPTH_BACKEND=onnx make run-analyzer-local
 ```
 
+
 To start the analyzer service with Depth Anything V2 backend:
 ```bash
 DEPTH_BACKEND=depth_anything_v2 make run-analyzer-local
 ```
 
+To start the analyzer service with Apple's ML Depth Pro backend:
+```bash
+DEPTH_BACKEND=depth_pro make run-analyzer-local
+```
+*Note: Depth Pro model weights are approx. 1.8 GB and will be downloaded automatically (or via `make download-depth-pro`).*
+
 Example production usage with custom model type:
 ```bash
 # Set model type via environment variable

@@ -45,6 +45,7 @@
         export_yolo_to_onnx,
         DEFAULT_MIDAS_MODEL,
         DEFAULT_MIDAS_REPO,
+        ensure_depth_pro_model_available,
     )
 except ImportError as e:
     logger.error("Failed to import backend modules: %s", e)
@@ -120,8 +121,9 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--models",
         type=str,
+
         default="yolo,midas",
-        help="Comma-separated list of models to process (yolo, midas, depth-anything)",
+        help="Comma-separated list of models to process (yolo, midas, depth-anything, depth-pro)",
     )
 
     return parser.parse_args()
@@ -213,10 +215,19 @@ def main() -> None:
 
         ensure_depth_anything_model_available(
             model_name=da_model,
+
             cache_dir=da_cache
         )
 
 
+    # --- Depth Pro Processing ---
+    if "depth-pro" in models_to_process:
+        logger.info("\n--- Processing Depth Pro ---")
+
+        dp_cache = config.DEPTH_PRO_CACHE_DIR
+        ensure_depth_pro_model_available(cache_dir=dp_cache)
+
+
     logger.info("\n--- Done ---")
     logger.info("Models available at: %s", output_dir)
     if midas_cache_final:

@@ -9,10 +9,11 @@ WORKDIR /app
 
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
 
-# Install system dependencies for opencv
+# Install system dependencies for opencv and git for depth_pro
 RUN apt-get update && apt-get install -y \
     libgl1 \
     libglib2.0-0 \
+    git \
     && rm -rf /var/lib/apt/lists/*
 
 COPY pyproject.toml uv.lock ./

@@ -57,6 +57,10 @@ class Config:
     DEPTH_ANYTHING_CACHE_DIR: Path = Path(
         os.getenv("DEPTH_ANYTHING_CACHE_DIR", "models/depth_anything_cache")
     ).resolve()
+    DEPTH_PRO_MODEL: str = os.getenv("DEPTH_PRO_MODEL", "depth_pro")
+    DEPTH_PRO_CACHE_DIR: Path = Path(
+        os.getenv("DEPTH_PRO_CACHE_DIR", "models/depth_pro_cache")
+    ).resolve()
     MIDAS_ONNX_MODEL_PATH: Path = Path(
         os.getenv("MIDAS_ONNX_MODEL_PATH", "models/midas_small.onnx")
     ).resolve()

@@ -259,9 +259,17 @@ def _predict_depth_map(
 register_depth_backend("torch", MiDasDepthEstimator)
 register_depth_backend("onnx", OnnxMiDasDepthEstimator)
 
+
 try:
     from common.core.depth_anything import DepthAnythingV2Estimator
 
     register_depth_backend("depth_anything_v2", DepthAnythingV2Estimator)
 except ImportError:
     pass
+
+try:
+    from common.core.depth_pro import DepthProEstimator
+
+    register_depth_backend("depth_pro", DepthProEstimator)
+except ImportError as e:
+    logger.debug(f"Could not register 'depth_pro' backend: {e}")
@@ -0,0 +1,115 @@
+# SPDX-FileCopyrightText: 2025 robot-visual-perception
+#
+# SPDX-License-Identifier: MIT
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+import torch
+from PIL import Image
+
+from common.config import config
+from common.core.contracts import DepthEstimator, Detection
+from common.core.depth_utils import resize_to_frame
+
+import depth_pro  # type: ignore
+
+logger = logging.getLogger(__name__)
+
+
+class DepthProEstimator(DepthEstimator):
+    """Depth estimator backed by Apple's ML Depth Pro."""
+
+    def __init__(
+        self,
+        cache_directory: Optional[Path] = None,
+        model_name: str = config.DEPTH_PRO_MODEL,
+    ) -> None:
+        # depth_pro import is now strict at module level.
+
+        self.region_size = config.REGION_SIZE
+        self.scale_factor = config.SCALE_FACTOR
+        self.update_freq = config.UPDATE_FREQ
+
+        self.update_id = -1
+        self.last_depths: list[float] = []
+
+        self.cache_directory = cache_directory or config.DEPTH_PRO_CACHE_DIR
+        self.model_name = model_name
+
+        logger.info("Loading Depth Pro model...")
+
+        self.device = (
+            torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+        )
+
+        try:
+            self.model, self.transform = depth_pro.create_model_and_transforms(
+                device=self.device, aspect_ratio=1.0
+            )
+            self.model.eval()
+        except Exception as e:
+            logger.error(f"Failed to initialize Depth Pro: {e}")
+            raise
+
+    def estimate_distance_m(
+        self, frame_rgb: np.ndarray, dets: list[Detection]
+    ) -> list[float]:
+        """Estimate distance in meters for each detection based on depth map."""
+        self.update_id += 1
+        if self.update_id % self.update_freq != 0 and len(self.last_depths) == len(
+            dets
+        ):
+            return self.last_depths
+
+        h, w, _ = frame_rgb.shape
+        depth_map = self._predict_depth_map(frame_rgb, (h, w))
+
+        distances = self._distances_from_depth_map(depth_map, dets)
+        self.last_depths = distances
+        return distances
+
+    def _predict_depth_map(
+        self, frame_rgb: np.ndarray, output_shape: tuple[int, int]
+    ) -> np.ndarray:
+        image_pil = Image.fromarray(frame_rgb)
+        image_tensor = self.transform(image_pil)
+
+        with torch.no_grad():
+            prediction = self.model.infer(image_tensor, f_px=None)
+
+        depth = prediction["depth"]
+
+        if isinstance(depth, torch.Tensor):
+            depth = depth.cpu().numpy()
+
+        return resize_to_frame(depth, output_shape)
+
+    def _distances_from_depth_map(
+        self,
+        depth_map: np.ndarray,
+        dets: list[Detection],
+    ) -> list[float]:
+        dists = []
+        h, w = depth_map.shape[:2]
+
+        for det in dets:
+            x1 = max(0, int(det.x1))
+            y1 = max(0, int(det.y1))
+            x2 = min(w, int(det.x2))
+            y2 = min(h, int(det.y2))
+
+            if x2 <= x1 or y2 <= y1:
+                dists.append(0.0)
+                continue
+
+            region = depth_map[y1:y2, x1:x2]
+
+            # Use median depth in the box as the object distance
+            dist_m = float(np.median(region))
+            dists.append(dist_m)
+
+        return dists
@@ -20,6 +20,11 @@
     AutoImageProcessor = None  # type: ignore
     AutoModelForDepthEstimation = None  # type: ignore
 
+try:
+    import depth_pro  # type: ignore
+except ImportError:
+    depth_pro = None
+
 logger = logging.getLogger(__name__)
 
 # Constants
@@ -290,3 +295,94 @@ def ensure_depth_anything_model_available(
         error_msg = f"Failed to load Depth Anything model {model_name}: {e}"
         logger.error(error_msg)
         raise RuntimeError(error_msg) from e
+
+
+def ensure_depth_pro_model_available(
+    cache_dir: Optional[Path] = None,
+) -> Path:
+    """Ensure Depth Pro model is downloaded and cached.
+
+    This function initializes the model once to trigger any internal downloads
+    or verifications provided by the 'depth_pro' library.
+
+    Args:
+        cache_dir: Directory to cache the model (if applicable/supported by depth_pro)
+
+    Returns:
+        Path where the model is expected to be (or just a success confirmation)
+    """
+    if cache_dir is None:
+        cache_dir = (
+            Path.home() / ".cache" / "torch" / "hub" / "checkpoints"
+        )  # Default guess or config
+
+    # Ensure import
+    if depth_pro is None:
+        raise ImportError(
+            "depth_pro is not available. Please install it via `uv sync --extra inference`"
+        )
+
+    try:
+        # Define expected checkpoint path
+        # depth_pro expects 'checkpoints/depth_pro.pt' by default relative to CWD?
+        # actually it looks for config.checkpoint_uri which defaults to that.
+        # We should set it or place the file there.
+
+        # NOTE: depth_pro implementation detail:
+        # It usually looks for `checkpoints/depth_pro.pt` in the current working directory.
+        # We can try to rely on that or see if we can trick it.
+        # However, it works best if we download it to a known location and maybe symlink or move it,
+        # OR if we can pass the path to `create_model_and_transforms`.
+        # Checking depth_pro source (passed context): `load(config.checkpoint_uri)`
+        # `config` is imported from `depth_pro`.
+
+        # Let's download to our cache dir first.
+
+        cache_dir = Path(str(cache_dir)).resolve()
+        cache_dir.mkdir(parents=True, exist_ok=True)
+
+        checkpoint_name = "depth_pro.pt"
+        checkpoint_path = cache_dir / checkpoint_name
+
+        url = "https://ml-site.cdn-apple.com/models/depth-pro/depth_pro.pt"
+
+        if not checkpoint_path.exists():
+            logger.info("Downloading Depth Pro weights to %s...", checkpoint_path)
+            torch.hub.download_url_to_file(url, str(checkpoint_path), progress=True)
+        else:
+            logger.info("Depth Pro weights found at %s", checkpoint_path)
+
+        # Now we need to tell depth_pro where the file is.
+        # Since we can't easily patch the config before import if it's already imported,
+        # we might need to modify `depth_pro.depth_pro.config.checkpoint_uri`?
+        # Or just symlink it to ./checkpoints/depth_pro.pt in the run directory?
+        #
+        # Let's try to set the config if exposed.
+        # Based on typical python modules:
+        # import depth_pro.config as dp_config ? or depth_pro.depth_pro.config?
+        #
+        # A safer bet for now (without deep diving into their config struct) is
+        # to ensure the file exists at `./checkpoints/depth_pro.pt` relative to CWD.
+
+        cwd_checkpoints = Path.cwd() / "checkpoints"
+        cwd_checkpoints.mkdir(exist_ok=True)
+        cwd_target = cwd_checkpoints / "depth_pro.pt"
+
+        if not cwd_target.exists():
+            # Symlink or copy
+            try:
+                cwd_target.symlink_to(checkpoint_path)
+                logger.info("Symlinked checkpoint to %s", cwd_target)
+            except OSError:
+                # Fallback to copy if symlink fails (e.g. windows without privs)
+                shutil.copy2(checkpoint_path, cwd_target)
+                logger.info("Copied checkpoint to %s", cwd_target)
+
+        # Now instantiate
+        depth_pro.create_model_and_transforms()
+
+        logger.info("Depth Pro model is ready.")
+        return cache_dir
+    except Exception as e:
+        logger.error(f"Failed to load Depth Pro model: {e}")
+        raise RuntimeError(f"Depth Pro initialization failed: {e}") from e
@@ -38,6 +38,7 @@ inference = [
     "ultralytics==8.3.58",
     "timm==1.0.22",
     "transformers==4.49.0",  # for Depth Anything V2
+    "depth_pro @ git+https://github.com/apple/ml-depth-pro.git",
 ]
 
 onnx-tools = [