roboflow
diff --git a/‎README.md‎
Lines changed: 18 additions & 2 deletions b/‎README.md‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎rfdetr/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎rfdetr/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎rfdetr/config.py‎
Lines changed: 38 additions & 2 deletions b/‎rfdetr/config.py‎
Lines changed: 38 additions & 2 deletions
diff --git a/‎rfdetr/datasets/coco.py‎
Lines changed: 48 additions & 23 deletions b/‎rfdetr/datasets/coco.py‎
Lines changed: 48 additions & 23 deletions
diff --git a/‎rfdetr/detr.py‎
Lines changed: 81 additions & 2 deletions b/‎rfdetr/detr.py‎
Lines changed: 81 additions & 2 deletions
@@ -38,7 +38,7 @@ We validated the performance of RF-DETR on both Microsoft COCO and the RF100-VL
 | LW-DETR-M        | 28.2               | 52.5                           | 84.0                              | 57.5                                  | 6.0                                   |
 | YOLO11m          | 20.0               | 51.5                           | 84.9                              | 59.7                                  | <ins>5.7</ins>                        |
 | YOLOv8m          | 28.9               | 50.6                           | 85.0                              | 59.8                                  | 6.3                                   |
-| RF-DETR-B        | 29.0               | 53.3                           | <ins>86.7</ins>                   | <ins>60.3</ins>                       | 6.0                                   |
+| RF-DETR-Medium    | 33.7               | 54.8                           | <ins>86.6</ins>                   | <ins>60.6</ins>                       | <ins>4.31</ins>                                   |
 
 
 <details>
@@ -55,6 +55,7 @@ We validated the performance of RF-DETR on both Microsoft COCO and the RF100-VL
 - `2025/03/20`: We release RF-DETR real-time object detection model. **Code and checkpoint for RF-DETR-large and RF-DETR-base are available.**
 - `2025/04/03`: We release early stopping, gradient checkpointing, metrics saving, training resume, TensorBoard and W&B logging support.
 - `2025/05/16`: We release an 'optimize_for_inference' method which speeds up native PyTorch by up to 2x, depending on platform.
+- `2025/07/23`: We release new SOTA model sizes: RF-DETR-Nano, RF-DETR-Small, RF-DETR-Medium.
 
 ## Installation
 
@@ -79,7 +80,22 @@ pip install git+https://github.com/roboflow/rf-detr.git
 
 ## Inference
 
-The easiest path to deployment is using Roboflow's [Inference](https://github.com/roboflow/inference) package. You can use model's uploaded to Roboflow's platform with Inference's `infer` method:
+The easiest path to deployment is using Roboflow's [Inference](https://github.com/roboflow/inference) package. 
+
+You can upload models using `.deploy_to_roboflow` like so:
+
+```python
+from rfdetr import RFDETRNano
+
+x = RFDETRNano(pretrain_weights="<path/to/prtrain/weights/dir>")
+x.deploy_to_roboflow(
+  workspace="<your-workspace>",
+  project_ids=["<your-project-id>"],
+  api_key="<YOUR_API_KEY>"
+)
+```
+
+These models will be available to use with Inference's `infer` method:
 
 ```python
 import os
 
@@ -58,6 +58,7 @@ dependencies = [
     "pydantic",
     "supervision",
     "matplotlib",
+    "roboflow"
 ]
 
 [project.optional-dependencies]
 
@@ -9,4 +9,4 @@
 if os.environ.get("PYTORCH_ENABLE_MPS_FALLBACK") is None:
     os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
 
-from rfdetr.detr import RFDETRBase, RFDETRLarge
+from rfdetr.detr import RFDETRBase, RFDETRLarge, RFDETRNano, RFDETRSmall, RFDETRMedium
@@ -13,10 +13,12 @@
 class ModelConfig(BaseModel):
     encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"]
     out_feature_indexes: List[int]
-    dec_layers: int = 3
+    dec_layers: int
     two_stage: bool = True
     projector_scale: List[Literal["P3", "P4", "P5"]]
     hidden_dim: int
+    patch_size: int
+    num_windows: int
     sa_nheads: int
     ca_nheads: int
     dec_n_points: int
@@ -27,13 +29,17 @@ class ModelConfig(BaseModel):
     num_classes: int = 90
     pretrain_weights: Optional[str] = None
     device: Literal["cpu", "cuda", "mps"] = DEVICE
-    resolution: int = 560
+    resolution: int
     group_detr: int = 13
     gradient_checkpointing: bool = False
+    positional_encoding_size: int
 
 class RFDETRBaseConfig(ModelConfig):
     encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_small"
     hidden_dim: int = 256
+    patch_size: int = 14
+    num_windows: int = 4
+    dec_layers: int = 3
     sa_nheads: int = 8
     ca_nheads: int = 16
     dec_n_points: int = 2
@@ -42,6 +48,8 @@ class RFDETRBaseConfig(ModelConfig):
     projector_scale: List[Literal["P3", "P4", "P5"]] = ["P4"]
     out_feature_indexes: List[int] = [2, 5, 8, 11]
     pretrain_weights: Optional[str] = "rf-detr-base.pth"
+    resolution: int = 560
+    positional_encoding_size: int = 37
 
 class RFDETRLargeConfig(RFDETRBaseConfig):
     encoder: Literal["dinov2_windowed_small", "dinov2_windowed_base"] = "dinov2_windowed_base"
@@ -52,6 +60,33 @@ class RFDETRLargeConfig(RFDETRBaseConfig):
     projector_scale: List[Literal["P3", "P4", "P5"]] = ["P3", "P5"]
     pretrain_weights: Optional[str] = "rf-detr-large.pth"
 
+class RFDETRNanoConfig(RFDETRBaseConfig):
+    out_feature_indexes: List[int] = [3, 6, 9, 12]
+    num_windows: int = 2
+    dec_layers: int = 2
+    patch_size: int = 16
+    resolution: int = 384
+    positional_encoding_size: int = 24
+    pretrain_weights: Optional[str] = "rf-detr-nano.pth"
+
+class RFDETRSmallConfig(RFDETRBaseConfig):
+    out_feature_indexes: List[int] = [3, 6, 9, 12]
+    num_windows: int = 2
+    dec_layers: int = 3
+    patch_size: int = 16
+    resolution: int = 512
+    positional_encoding_size: int = 32
+    pretrain_weights: Optional[str] = "rf-detr-small.pth"
+
+class RFDETRMediumConfig(RFDETRBaseConfig):
+    out_feature_indexes: List[int] = [3, 6, 9, 12]
+    num_windows: int = 2
+    dec_layers: int = 4
+    patch_size: int = 16
+    resolution: int = 576
+    positional_encoding_size: int = 36
+    pretrain_weights: Optional[str] = "rf-detr-medium.pth"
+
 class TrainConfig(BaseModel):
     lr: float = 1e-4
     lr_encoder: float = 1.5e-4
@@ -76,6 +111,7 @@ class TrainConfig(BaseModel):
     output_dir: str = "output"
     multi_scale: bool = True
     expanded_scales: bool = True
+    do_random_resize_via_padding: bool = False
     use_ema: bool = True
     num_workers: int = 2
     weight_decay: float = 1e-4
 
@@ -27,24 +27,13 @@
 import rfdetr.datasets.transforms as T
 
 
-def compute_multi_scale_scales(resolution, expanded_scales=False):
-    if resolution == 640:
-        # assume we're doing the original 640x640 and therefore patch_size is 16
-        patch_size = 16
-    elif resolution % (14 * 4) == 0:
-        # assume we're doing some dinov2 resolution variant and therefore patch_size is 14
-        patch_size = 14
-    elif resolution % (16 * 4) == 0:
-        # assume we're doing some other resolution and therefore patch_size is 16
-        patch_size = 16
-    else:
-        raise ValueError(f"Resolution {resolution} is not divisible by 16*4 or 14*4")
+def compute_multi_scale_scales(resolution, expanded_scales=False, patch_size=16, num_windows=4):
     # round to the nearest multiple of 4*patch_size to enable both patching and windowing
-    base_num_patches_per_window = resolution // (patch_size * 4)
+    base_num_patches_per_window = resolution // (patch_size * num_windows)
     offsets = [-3, -2, -1, 0, 1, 2, 3, 4] if not expanded_scales else [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]
     scales = [base_num_patches_per_window + offset for offset in offsets]
-    proposed_scales = [scale * patch_size * 4 for scale in scales]
-    proposed_scales = [scale for scale in proposed_scales if scale >= patch_size * 4]  # ensure minimum image size
+    proposed_scales = [scale * patch_size * num_windows for scale in scales]
+    proposed_scales = [scale for scale in proposed_scales if scale >= patch_size * num_windows * 2]  # ensure minimum image size
     return proposed_scales
 
 
@@ -107,7 +96,7 @@ def __call__(self, image, target):
         return image, target
 
 
-def make_coco_transforms(image_set, resolution, multi_scale=False, expanded_scales=False):
+def make_coco_transforms(image_set, resolution, multi_scale=False, expanded_scales=False, skip_random_resize=False, patch_size=16, num_windows=4):
 
     normalize = T.Compose([
         T.ToTensor(),
@@ -117,7 +106,9 @@ def make_coco_transforms(image_set, resolution, multi_scale=False, expanded_scal
     scales = [resolution]
     if multi_scale:
         # scales = [448, 512, 576, 640, 704, 768, 832, 896]
-        scales = compute_multi_scale_scales(resolution, expanded_scales)
+        scales = compute_multi_scale_scales(resolution, expanded_scales, patch_size, num_windows)
+        if skip_random_resize:
+            scales = [scales[-1]]
         print(scales)
 
     if image_set == 'train':
@@ -148,7 +139,7 @@ def make_coco_transforms(image_set, resolution, multi_scale=False, expanded_scal
     raise ValueError(f'unknown {image_set}')
 
 
-def make_coco_transforms_square_div_64(image_set, resolution, multi_scale=False, expanded_scales=False):
+def make_coco_transforms_square_div_64(image_set, resolution, multi_scale=False, expanded_scales=False, skip_random_resize=False, patch_size=16, num_windows=4):
     """
     """
 
@@ -161,7 +152,9 @@ def make_coco_transforms_square_div_64(image_set, resolution, multi_scale=False,
     scales = [resolution]
     if multi_scale:
         # scales = [448, 512, 576, 640, 704, 768, 832, 896]
-        scales = compute_multi_scale_scales(resolution, expanded_scales)
+        scales = compute_multi_scale_scales(resolution, expanded_scales, patch_size, num_windows)
+        if skip_random_resize:
+            scales = [scales[-1]]
         print(scales)
 
     if image_set == 'train':
@@ -220,9 +213,25 @@ def build(image_set, args, resolution):
 
 
     if square_resize_div_64:
-        dataset = CocoDetection(img_folder, ann_file, transforms=make_coco_transforms_square_div_64(image_set, resolution, multi_scale=args.multi_scale, expanded_scales=args.expanded_scales))
+        dataset = CocoDetection(img_folder, ann_file, transforms=make_coco_transforms_square_div_64(
+            image_set,
+            resolution,
+            multi_scale=args.multi_scale,
+            expanded_scales=args.expanded_scales,
+            skip_random_resize=not args.do_random_resize_via_padding,
+            patch_size=args.patch_size,
+            num_windows=args.num_windows
+        ))
     else:
-        dataset = CocoDetection(img_folder, ann_file, transforms=make_coco_transforms(image_set, resolution, multi_scale=args.multi_scale, expanded_scales=args.expanded_scales))
+        dataset = CocoDetection(img_folder, ann_file, transforms=make_coco_transforms(
+            image_set,
+            resolution,
+            multi_scale=args.multi_scale,
+            expanded_scales=args.expanded_scales,
+            skip_random_resize=not args.do_random_resize_via_padding,
+            patch_size=args.patch_size,
+            num_windows=args.num_windows
+        ))
     return dataset
 
 def build_roboflow(image_set, args, resolution):
@@ -249,7 +258,23 @@ def build_roboflow(image_set, args, resolution):
 
 
     if square_resize_div_64:
-        dataset = CocoDetection(img_folder, ann_file, transforms=make_coco_transforms_square_div_64(image_set, resolution, multi_scale=args.multi_scale))
+        dataset = CocoDetection(img_folder, ann_file, transforms=make_coco_transforms_square_div_64(
+            image_set,
+            resolution,
+            multi_scale=args.multi_scale,
+            expanded_scales=args.expanded_scales,
+            skip_random_resize=not args.do_random_resize_via_padding,
+            patch_size=args.patch_size,
+            num_windows=args.num_windows
+        ))
     else:
-        dataset = CocoDetection(img_folder, ann_file, transforms=make_coco_transforms(image_set, resolution, multi_scale=args.multi_scale))
+        dataset = CocoDetection(img_folder, ann_file, transforms=make_coco_transforms(
+            image_set,
+            resolution,
+            multi_scale=args.multi_scale,
+            expanded_scales=args.expanded_scales,
+            skip_random_resize=not args.do_random_resize_via_padding,
+            patch_size=args.patch_size,
+            num_windows=args.num_windows
+        ))
     return dataset
@@ -23,7 +23,15 @@
 except:
     pass
 
-from rfdetr.config import RFDETRBaseConfig, RFDETRLargeConfig, TrainConfig, ModelConfig
+from rfdetr.config import (
+    RFDETRBaseConfig,
+    RFDETRLargeConfig,
+    RFDETRNanoConfig,
+    RFDETRSmallConfig,
+    RFDETRMediumConfig,
+    TrainConfig,
+    ModelConfig
+)
 from rfdetr.main import Model, download_pretrain_weights
 from rfdetr.util.metrics import MetricsPlotSink, MetricsTensorBoardSink, MetricsWandBSink
 from rfdetr.util.coco_classes import COCO_CLASSES
@@ -32,6 +40,7 @@
 class RFDETR:
     means = [0.485, 0.456, 0.406]
     stds = [0.229, 0.224, 0.225]
+    size = None
 
     def __init__(self, **kwargs):
         self.model_config = self.get_model_config(**kwargs)
@@ -324,12 +333,48 @@ def predict(
             detections_list.append(detections)
 
         return detections_list if len(detections_list) > 1 else detections_list[0]
+    
+    def deploy_to_roboflow(self, workspace: str, project_ids: List[str], api_key: str = None, size: str = None, model_name: str = None):
+        from roboflow import Roboflow
+        import shutil
+        if api_key is None:
+            api_key = os.getenv("ROBOFLOW_API_KEY")
+            if api_key is None:
+                raise ValueError("Set api_key=<KEY> in deploy_to_roboflow or export ROBOFLOW_API_KEY=<KEY>")
+
+
+        rf = Roboflow(api_key=api_key)
+        workspace = rf.workspace(workspace)
+
+        if self.size is None and size is None:
+            raise ValueError("Must set size for custom architectures")
+
+        size = self.size or size
+        tmp_out_dir = ".roboflow_temp_upload"
+        os.makedirs(tmp_out_dir, exist_ok=True)
+        outpath = os.path.join(tmp_out_dir, "weights.pth")
+        torch.save(
+            {
+                "model": self.model.model,
+                "args": self.model.args
+            }, outpath
+        )
+
+        out = workspace.deploy_model(
+            model_type=size,
+            model_path=tmp_out_dir,
+            project_ids=project_ids,
+            model_name=model_name or size + "-uploaded"
+        )
+        return out
+
 
 
 class RFDETRBase(RFDETR):
     """
     Train an RF-DETR Base model (29M parameters).
     """
+    size = "rfdetr-base"
     def get_model_config(self, **kwargs):
         return RFDETRBaseConfig(**kwargs)
 
@@ -338,10 +383,44 @@ def get_train_config(self, **kwargs):
 
 class RFDETRLarge(RFDETR):
     """
-    Train an RF-DETR Base model.
+    Train an RF-DETR Large model.
     """
+    size = "rfdetr-large"
     def get_model_config(self, **kwargs):
         return RFDETRLargeConfig(**kwargs)
 
     def get_train_config(self, **kwargs):
         return TrainConfig(**kwargs)
+
+class RFDETRNano(RFDETR):
+    """
+    Train an RF-DETR Nano model.
+    """
+    size = "rfdetr-nano"
+    def get_model_config(self, **kwargs):
+        return RFDETRNanoConfig(**kwargs)
+
+    def get_train_config(self, **kwargs):
+        return TrainConfig(**kwargs)
+
+class RFDETRSmall(RFDETR):
+    """
+    Train an RF-DETR Small model.
+    """
+    size = "rfdetr-small"
+    def get_model_config(self, **kwargs):
+        return RFDETRSmallConfig(**kwargs)
+
+    def get_train_config(self, **kwargs):
+        return TrainConfig(**kwargs)
+
+class RFDETRMedium(RFDETR):
+    """
+    Train an RF-DETR Medium model.
+    """
+    size = "rfdetr-medium"
+    def get_model_config(self, **kwargs):
+        return RFDETRMediumConfig(**kwargs)
+
+    def get_train_config(self, **kwargs):
+        return TrainConfig(**kwargs)
Original file line number	Diff line number	Diff line change
`@@ -58,6 +58,7 @@ dependencies = [`
`58`	`58`	`"pydantic",`
`59`	`59`	`"supervision",`
`60`	`60`	`"matplotlib",`
	`61`	`+ "roboflow"`
`61`	`62`	`]`
`62`	`63`
`63`	`64`	`[project.optional-dependencies]`