Merge pull request #367 from roboflow/sb/debug-import

shntu · web-flow · commit f40a6535f9b4 · 2025-03-25T10:33:03.000-04:00
Fix handling of invalid annotation descriptions and add tests to dataset uploads
diff --git a/roboflow/core/workspace.py b/roboflow/core/workspace.py
@@ -360,7 +360,7 @@ def _save_annotation(image_id, imagedesc):
             annotation_path = None
 
             annotationdesc = imagedesc.get("annotationfile")
-            if annotationdesc:
+            if isinstance(annotationdesc, dict):
                 if annotationdesc.get("rawText"):
                     annotation_path = annotationdesc
                 else:
@@ -369,8 +369,7 @@ def _save_annotation(image_id, imagedesc):
 
                 if isinstance(labelmap, str):
                     labelmap = load_labelmap(labelmap)
-
-            if not annotation_path:
+            else:
                 return None, None
 
             annotation, upload_time, _retry_attempts = project.save_annotation(
diff --git a/tests/test_project.py b/tests/test_project.py
@@ -1,14 +1,81 @@
+from unittest.mock import patch
+
 import requests
 import responses
 from responses.matchers import json_params_matcher
 
 from roboflow import API_URL
 from roboflow.adapters.rfapi import AnnotationSaveError, ImageUploadError
 from roboflow.config import DEFAULT_BATCH_NAME
-from tests import PROJECT_NAME, ROBOFLOW_API_KEY, WORKSPACE_NAME, RoboflowTest
+from tests import PROJECT_NAME, ROBOFLOW_API_KEY, WORKSPACE_NAME, RoboflowTest, ordered
 
 
 class TestProject(RoboflowTest):
+    def _create_test_dataset(self, images=None):
+        """
+        Create a test dataset with specified images or a default image
+
+        Args:
+            images: List of image dictionaries. If None, a default image will be used.
+
+        Returns:
+            Dictionary representing a parsed dataset
+        """
+        if images is None:
+            images = [{"file": "image1.jpg", "split": "train", "annotationfile": {"file": "image1.xml"}}]
+
+        return {"location": "/test/location/", "images": images}
+
+    def _setup_upload_dataset_mocks(
+        self,
+        test_dataset=None,
+        image_return=None,
+        annotation_return=None,
+        project_created=False,
+        save_annotation_side_effect=None,
+        upload_image_side_effect=None,
+    ):
+        """
+        Set up common mocks for upload_dataset tests
+
+        Args:
+            test_dataset: The dataset to return from parsefolder. If None, creates a default dataset
+            image_return: Return value for upload_image. Default is successful upload
+            annotation_return: Return value for save_annotation. Default is successful annotation
+            project_created: Whether to simulate a newly created project
+            save_annotation_side_effect: Side effect function for save_annotation
+            upload_image_side_effect: Side effect function for upload_image
+
+        Returns:
+            Dictionary of mock objects with start and stop methods
+        """
+        if test_dataset is None:
+            test_dataset = self._create_test_dataset()
+
+        if image_return is None:
+            image_return = ({"id": "test-id", "success": True}, 0.1, 0)
+
+        if annotation_return is None:
+            annotation_return = ({"success": True}, 0.1, 0)
+
+        # Create the mock objects
+        mocks = {
+            "parser": patch("roboflow.core.workspace.folderparser.parsefolder", return_value=test_dataset),
+            "upload": patch("roboflow.core.workspace.Project.upload_image", side_effect=upload_image_side_effect)
+            if upload_image_side_effect
+            else patch("roboflow.core.workspace.Project.upload_image", return_value=image_return),
+            "save_annotation": patch(
+                "roboflow.core.workspace.Project.save_annotation", side_effect=save_annotation_side_effect
+            )
+            if save_annotation_side_effect
+            else patch("roboflow.core.workspace.Project.save_annotation", return_value=annotation_return),
+            "get_project": patch(
+                "roboflow.core.workspace.Workspace._get_or_create_project", return_value=(self.project, project_created)
+            ),
+        }
+
+        return mocks
+
     def test_check_valid_image_with_accepted_formats(self):
         images_to_test = [
             "rabbit.JPG",
@@ -224,3 +291,166 @@ def test_create_annotation_job_error(self):
             )
 
         self.assertEqual(str(context.exception), "Batch not found")
+
+    @ordered
+    @responses.activate
+    def test_project_upload_dataset(self):
+        """Test upload_dataset functionality with various scenarios"""
+        test_scenarios = [
+            {
+                "name": "string_annotationdesc",
+                "dataset": [{"file": "test_image.jpg", "split": "train", "annotationfile": "string_annotation.txt"}],
+                "params": {"num_workers": 1},
+                "assertions": {},
+            },
+            {
+                "name": "success_basic",
+                "dataset": [
+                    {"file": "image1.jpg", "split": "train", "annotationfile": {"file": "image1.xml"}},
+                    {"file": "image2.jpg", "split": "valid", "annotationfile": {"file": "image2.xml"}},
+                ],
+                "params": {},
+                "assertions": {"parser": [("/test/dataset",)], "upload": {"count": 2}, "save_annotation": {"count": 2}},
+                "image_return": ({"id": "test-id-1", "success": True}, 0.1, 0),
+            },
+            {
+                "name": "custom_parameters",
+                "dataset": None,
+                "params": {
+                    "num_workers": 2,
+                    "project_license": "CC BY 4.0",
+                    "project_type": "classification",
+                    "batch_name": "test-batch",
+                    "num_retries": 3,
+                },
+                "assertions": {"upload": {"count": 1, "kwargs": {"batch_name": "test-batch", "num_retry_uploads": 3}}},
+            },
+            {
+                "name": "project_creation",
+                "dataset": None,
+                "params": {"project_name": "new-project"},
+                "assertions": {},
+                "project_created": True,
+            },
+            {
+                "name": "with_labelmap",
+                "dataset": [
+                    {
+                        "file": "image1.jpg",
+                        "split": "train",
+                        "annotationfile": {"file": "image1.xml", "labelmap": "path/to/labelmap.json"},
+                    }
+                ],
+                "params": {},
+                "assertions": {"save_annotation": {"count": 1}, "load_labelmap": {"count": 1}},
+                "extra_mocks": [
+                    (
+                        "load_labelmap",
+                        "roboflow.core.workspace.load_labelmap",
+                        {"return_value": {"old_label": "new_label"}},
+                    )
+                ],
+            },
+            {
+                "name": "concurrent_uploads",
+                "dataset": [{"file": f"image{i}.jpg", "split": "train"} for i in range(10)],
+                "params": {"num_workers": 5},
+                "assertions": {"thread_pool": {"count": 1, "kwargs": {"max_workers": 5}}},
+                "extra_mocks": [("thread_pool", "concurrent.futures.ThreadPoolExecutor", {})],
+            },
+            {"name": "empty_dataset", "dataset": [], "params": {}, "assertions": {"upload": {"count": 0}}},
+            {
+                "name": "raw_text_annotation",
+                "dataset": [
+                    {
+                        "file": "image1.jpg",
+                        "split": "train",
+                        "annotationfile": {"rawText": "annotation content here", "format": "json"},
+                    }
+                ],
+                "params": {},
+                "assertions": {"save_annotation": {"count": 1}},
+            },
+        ]
+
+        error_cases = [
+            {
+                "name": "image_upload_error",
+                "side_effect": {
+                    "upload_image_side_effect": lambda *args, **kwargs: (_ for _ in ()).throw(
+                        ImageUploadError("Failed to upload image")
+                    )
+                },
+                "params": {"num_workers": 1},
+            },
+            {
+                "name": "annotation_upload_error",
+                "side_effect": {
+                    "save_annotation_side_effect": lambda *args, **kwargs: (_ for _ in ()).throw(
+                        AnnotationSaveError("Failed to save annotation")
+                    )
+                },
+                "params": {"num_workers": 1},
+            },
+        ]
+
+        for scenario in test_scenarios:
+            test_dataset = (
+                self._create_test_dataset(scenario.get("dataset")) if scenario.get("dataset") is not None else None
+            )
+
+            extra_mocks = {}
+            if "extra_mocks" in scenario:
+                for mock_name, target, config in scenario.get("extra_mocks", []):
+                    extra_mocks[mock_name] = patch(target, **config)
+
+            mocks = self._setup_upload_dataset_mocks(
+                test_dataset=test_dataset,
+                image_return=scenario.get("image_return"),
+                project_created=scenario.get("project_created", False),
+            )
+
+            mock_objects = {}
+            for name, mock in mocks.items():
+                mock_objects[name] = mock.start()
+
+            for name, mock in extra_mocks.items():
+                mock_objects[name] = mock.start()
+
+            try:
+                params = {"dataset_path": "/test/dataset", "project_name": PROJECT_NAME}
+                params.update(scenario.get("params", {}))
+
+                self.workspace.upload_dataset(**params)
+
+                for mock_name, assertion in scenario.get("assertions", {}).items():
+                    if isinstance(assertion, list):
+                        mock_obj = mock_objects.get(mock_name)
+                        call_args_list = [args for args, _ in mock_obj.call_args_list]
+                        for expected_args in assertion:
+                            self.assertIn(expected_args, call_args_list)
+                    elif isinstance(assertion, dict):
+                        mock_obj = mock_objects.get(mock_name)
+                        if "count" in assertion:
+                            self.assertEqual(mock_obj.call_count, assertion["count"])
+                        if "kwargs" in assertion and mock_obj.call_count > 0:
+                            _, kwargs = mock_obj.call_args
+                            for key, value in assertion["kwargs"].items():
+                                self.assertEqual(kwargs.get(key), value)
+            finally:
+                for mock in list(mocks.values()) + list(extra_mocks.values()):
+                    mock.stop()
+
+        for case in error_cases:
+            mocks = self._setup_upload_dataset_mocks(**case.get("side_effect", {}))
+
+            for mock in mocks.values():
+                mock.start()
+
+            try:
+                params = {"dataset_path": "/test/dataset", "project_name": PROJECT_NAME}
+                params.update(case.get("params", {}))
+                self.workspace.upload_dataset(**params)
+            finally:
+                for mock in mocks.values():
+                    mock.stop()