Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,4 @@ src/backend/models/*
src/backend/models/*.pt
src/backend/models/*.onnx
src/backend/models/midas_cache/
src/backend/checkpoints/
8 changes: 8 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,11 @@ download-depth-anything:
cd src/backend && uv run python ../../scripts/download_models.py \
--models depth-anything \
--output-dir $(MODELS_DIR)

download-depth-pro:
@echo "Downloading Depth Pro model..."
@mkdir -p $(MODELS_DIR)
cd src/backend && uv sync --extra inference
cd src/backend && uv run python ../../scripts/download_models.py \
--models depth-pro \
--output-dir $(MODELS_DIR)
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ make download-models-onnx
make download-yolo
make download-midas
make download-depth-anything
make download-depth-pro

# Export models to ONNX
make export-yolo-onnx
Expand All @@ -89,11 +90,18 @@ To start the analyzer service with ONNX backend:
DETECTOR_BACKEND=onnx DEPTH_BACKEND=onnx make run-analyzer-local
```


To start the analyzer service with Depth Anything V2 backend:
```bash
DEPTH_BACKEND=depth_anything_v2 make run-analyzer-local
```

To start the analyzer service with Apple's ML Depth Pro backend:
```bash
DEPTH_BACKEND=depth_pro make run-analyzer-local
```
*Note: Depth Pro model weights are approx. 1.8 GB and will be downloaded automatically (or via `make download-depth-pro`).*

Example production usage with custom model type:
```bash
# Set model type via environment variable
Expand Down
13 changes: 12 additions & 1 deletion scripts/download_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
export_yolo_to_onnx,
DEFAULT_MIDAS_MODEL,
DEFAULT_MIDAS_REPO,
ensure_depth_pro_model_available,
)
except ImportError as e:
logger.error("Failed to import backend modules: %s", e)
Expand Down Expand Up @@ -120,8 +121,9 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"--models",
type=str,

default="yolo,midas",
help="Comma-separated list of models to process (yolo, midas, depth-anything)",
help="Comma-separated list of models to process (yolo, midas, depth-anything, depth-pro)",
)

return parser.parse_args()
Expand Down Expand Up @@ -213,10 +215,19 @@ def main() -> None:

ensure_depth_anything_model_available(
model_name=da_model,

cache_dir=da_cache
)


# --- Depth Pro Processing ---
if "depth-pro" in models_to_process:
logger.info("\n--- Processing Depth Pro ---")

dp_cache = config.DEPTH_PRO_CACHE_DIR
ensure_depth_pro_model_available(cache_dir=dp_cache)


logger.info("\n--- Done ---")
logger.info("Models available at: %s", output_dir)
if midas_cache_final:
Expand Down
3 changes: 2 additions & 1 deletion src/backend/Dockerfile.analyzer
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ WORKDIR /app

COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv

# Install system dependencies for opencv
# Install system dependencies for opencv and git for depth_pro
RUN apt-get update && apt-get install -y \
libgl1 \
libglib2.0-0 \
git \
&& rm -rf /var/lib/apt/lists/*

COPY pyproject.toml uv.lock ./
Expand Down
4 changes: 4 additions & 0 deletions src/backend/common/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ class Config:
DEPTH_ANYTHING_CACHE_DIR: Path = Path(
os.getenv("DEPTH_ANYTHING_CACHE_DIR", "models/depth_anything_cache")
).resolve()
DEPTH_PRO_MODEL: str = os.getenv("DEPTH_PRO_MODEL", "depth_pro")
DEPTH_PRO_CACHE_DIR: Path = Path(
os.getenv("DEPTH_PRO_CACHE_DIR", "models/depth_pro_cache")
).resolve()
MIDAS_ONNX_MODEL_PATH: Path = Path(
os.getenv("MIDAS_ONNX_MODEL_PATH", "models/midas_small.onnx")
).resolve()
Expand Down
8 changes: 8 additions & 0 deletions src/backend/common/core/depth.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,17 @@ def _predict_depth_map(
register_depth_backend("torch", MiDasDepthEstimator)
register_depth_backend("onnx", OnnxMiDasDepthEstimator)


try:
from common.core.depth_anything import DepthAnythingV2Estimator

register_depth_backend("depth_anything_v2", DepthAnythingV2Estimator)
except ImportError:
pass

try:
from common.core.depth_pro import DepthProEstimator

register_depth_backend("depth_pro", DepthProEstimator)
except ImportError as e:
logger.debug(f"Could not register 'depth_pro' backend: {e}")
115 changes: 115 additions & 0 deletions src/backend/common/core/depth_pro.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# SPDX-FileCopyrightText: 2025 robot-visual-perception
#
# SPDX-License-Identifier: MIT
from __future__ import annotations

import logging
from pathlib import Path
from typing import Optional

import numpy as np
import torch
from PIL import Image

from common.config import config
from common.core.contracts import DepthEstimator, Detection
from common.core.depth_utils import resize_to_frame

import depth_pro # type: ignore

logger = logging.getLogger(__name__)


class DepthProEstimator(DepthEstimator):
"""Depth estimator backed by Apple's ML Depth Pro."""

def __init__(
self,
cache_directory: Optional[Path] = None,
model_name: str = config.DEPTH_PRO_MODEL,
) -> None:
# depth_pro import is now strict at module level.

self.region_size = config.REGION_SIZE
self.scale_factor = config.SCALE_FACTOR
self.update_freq = config.UPDATE_FREQ

self.update_id = -1
self.last_depths: list[float] = []

self.cache_directory = cache_directory or config.DEPTH_PRO_CACHE_DIR
self.model_name = model_name

logger.info("Loading Depth Pro model...")

self.device = (
torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
)

try:
self.model, self.transform = depth_pro.create_model_and_transforms(
device=self.device, aspect_ratio=1.0
)
self.model.eval()
except Exception as e:
logger.error(f"Failed to initialize Depth Pro: {e}")
raise

def estimate_distance_m(
self, frame_rgb: np.ndarray, dets: list[Detection]
) -> list[float]:
"""Estimate distance in meters for each detection based on depth map."""
self.update_id += 1
if self.update_id % self.update_freq != 0 and len(self.last_depths) == len(
dets
):
return self.last_depths

h, w, _ = frame_rgb.shape
depth_map = self._predict_depth_map(frame_rgb, (h, w))

distances = self._distances_from_depth_map(depth_map, dets)
self.last_depths = distances
return distances

def _predict_depth_map(
self, frame_rgb: np.ndarray, output_shape: tuple[int, int]
) -> np.ndarray:
image_pil = Image.fromarray(frame_rgb)
image_tensor = self.transform(image_pil)

with torch.no_grad():
prediction = self.model.infer(image_tensor, f_px=None)

depth = prediction["depth"]

if isinstance(depth, torch.Tensor):
depth = depth.cpu().numpy()

return resize_to_frame(depth, output_shape)

def _distances_from_depth_map(
self,
depth_map: np.ndarray,
dets: list[Detection],
) -> list[float]:
dists = []
h, w = depth_map.shape[:2]

for det in dets:
x1 = max(0, int(det.x1))
y1 = max(0, int(det.y1))
x2 = min(w, int(det.x2))
y2 = min(h, int(det.y2))

if x2 <= x1 or y2 <= y1:
dists.append(0.0)
continue

region = depth_map[y1:y2, x1:x2]

# Use median depth in the box as the object distance
dist_m = float(np.median(region))
dists.append(dist_m)

return dists
96 changes: 96 additions & 0 deletions src/backend/common/core/model_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@
AutoImageProcessor = None # type: ignore
AutoModelForDepthEstimation = None # type: ignore

try:
import depth_pro # type: ignore
except ImportError:
depth_pro = None

logger = logging.getLogger(__name__)

# Constants
Expand Down Expand Up @@ -290,3 +295,94 @@ def ensure_depth_anything_model_available(
error_msg = f"Failed to load Depth Anything model {model_name}: {e}"
logger.error(error_msg)
raise RuntimeError(error_msg) from e


def ensure_depth_pro_model_available(
cache_dir: Optional[Path] = None,
) -> Path:
"""Ensure Depth Pro model is downloaded and cached.

This function initializes the model once to trigger any internal downloads
or verifications provided by the 'depth_pro' library.

Args:
cache_dir: Directory to cache the model (if applicable/supported by depth_pro)

Returns:
Path where the model is expected to be (or just a success confirmation)
"""
if cache_dir is None:
cache_dir = (
Path.home() / ".cache" / "torch" / "hub" / "checkpoints"
) # Default guess or config

# Ensure import
if depth_pro is None:
raise ImportError(
"depth_pro is not available. Please install it via `uv sync --extra inference`"
)

try:
# Define expected checkpoint path
# depth_pro expects 'checkpoints/depth_pro.pt' by default relative to CWD?
# actually it looks for config.checkpoint_uri which defaults to that.
# We should set it or place the file there.

# NOTE: depth_pro implementation detail:
# It usually looks for `checkpoints/depth_pro.pt` in the current working directory.
# We can try to rely on that or see if we can trick it.
# However, it works best if we download it to a known location and maybe symlink or move it,
# OR if we can pass the path to `create_model_and_transforms`.
# Checking depth_pro source (passed context): `load(config.checkpoint_uri)`
# `config` is imported from `depth_pro`.

# Let's download to our cache dir first.

cache_dir = Path(str(cache_dir)).resolve()
cache_dir.mkdir(parents=True, exist_ok=True)

checkpoint_name = "depth_pro.pt"
checkpoint_path = cache_dir / checkpoint_name

url = "https://ml-site.cdn-apple.com/models/depth-pro/depth_pro.pt"

if not checkpoint_path.exists():
logger.info("Downloading Depth Pro weights to %s...", checkpoint_path)
torch.hub.download_url_to_file(url, str(checkpoint_path), progress=True)
else:
logger.info("Depth Pro weights found at %s", checkpoint_path)

# Now we need to tell depth_pro where the file is.
# Since we can't easily patch the config before import if it's already imported,
# we might need to modify `depth_pro.depth_pro.config.checkpoint_uri`?
# Or just symlink it to ./checkpoints/depth_pro.pt in the run directory?
#
# Let's try to set the config if exposed.
# Based on typical python modules:
# import depth_pro.config as dp_config ? or depth_pro.depth_pro.config?
#
# A safer bet for now (without deep diving into their config struct) is
# to ensure the file exists at `./checkpoints/depth_pro.pt` relative to CWD.

cwd_checkpoints = Path.cwd() / "checkpoints"
cwd_checkpoints.mkdir(exist_ok=True)
cwd_target = cwd_checkpoints / "depth_pro.pt"

if not cwd_target.exists():
# Symlink or copy
try:
cwd_target.symlink_to(checkpoint_path)
logger.info("Symlinked checkpoint to %s", cwd_target)
except OSError:
# Fallback to copy if symlink fails (e.g. windows without privs)
shutil.copy2(checkpoint_path, cwd_target)
logger.info("Copied checkpoint to %s", cwd_target)

# Now instantiate
depth_pro.create_model_and_transforms()

logger.info("Depth Pro model is ready.")
return cache_dir
except Exception as e:
logger.error(f"Failed to load Depth Pro model: {e}")
raise RuntimeError(f"Depth Pro initialization failed: {e}") from e
1 change: 1 addition & 0 deletions src/backend/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ inference = [
"ultralytics==8.3.58",
"timm==1.0.22",
"transformers==4.49.0", # for Depth Anything V2
"depth_pro @ git+https://github.com/apple/ml-depth-pro.git",
]

onnx-tools = [
Expand Down
Loading
Loading