Improve error handling and add comprehensive test coverage

Copilot · astafan8 · Copilot · commit 504f2d0d6dbf · 2025-06-10T14:29:45.000Z
Co-authored-by: astafan8 &lt;15662810+astafan8@users.noreply.github.com&gt;
diff --git a/src/qcodes/dataset/database_extract_runs.py b/src/qcodes/dataset/database_extract_runs.py
@@ -218,36 +218,55 @@ def export_datasets_and_create_metadata_db(
     else:
         export_path = Path(export_path)
     
+    # Validate source database exists
+    if not source_db_path.exists():
+        raise FileNotFoundError(f"Source database file not found: {source_db_path}")
+    
     log.info(f"Starting export process from {source_db_path} to {target_db_path}")
     log.info(f"NetCDF files will be exported to {export_path}")
     
     # Check database versions
-    (s_v, new_v) = get_db_version_and_newest_available_version(source_db_path)
-    if s_v < new_v and not upgrade_source_db:
-        warn(
-            f"Source DB version is {s_v}, but this function needs it to be"
-            f" in version {new_v}. Run this function again with "
-            "upgrade_source_db=True to auto-upgrade the source DB file."
-        )
-        return {}
-
-    if target_db_path.exists():
-        (t_v, new_v) = get_db_version_and_newest_available_version(target_db_path)
-        if t_v < new_v and not upgrade_target_db:
+    try:
+        (s_v, new_v) = get_db_version_and_newest_available_version(source_db_path)
+        if s_v < new_v and not upgrade_source_db:
             warn(
-                f"Target DB version is {t_v}, but this function needs it to "
-                f"be in version {new_v}. Run this function again with "
-                "upgrade_target_db=True to auto-upgrade the target DB file."
+                f"Source DB version is {s_v}, but this function needs it to be"
+                f" in version {new_v}. Run this function again with "
+                "upgrade_source_db=True to auto-upgrade the source DB file."
             )
             return {}
+    except Exception as e:
+        log.error(f"Failed to check source database version: {e}")
+        raise
+
+    if target_db_path.exists():
+        try:
+            (t_v, new_v) = get_db_version_and_newest_available_version(target_db_path)
+            if t_v < new_v and not upgrade_target_db:
+                warn(
+                    f"Target DB version is {t_v}, but this function needs it to "
+                    f"be in version {new_v}. Run this function again with "
+                    "upgrade_target_db=True to auto-upgrade the target DB file."
+                )
+                return {}
+        except Exception as e:
+            log.error(f"Failed to check target database version: {e}")
+            raise
 
     # Create export directory if it doesn't exist
-    export_path.mkdir(parents=True, exist_ok=True)
+    try:
+        export_path.mkdir(parents=True, exist_ok=True)
+    except Exception as e:
+        log.error(f"Failed to create export directory {export_path}: {e}")
+        raise
     
-    source_conn = connect(source_db_path)
-    target_conn = connect(target_db_path)
+    source_conn = None
+    target_conn = None
     
     try:
+        source_conn = connect(source_db_path)
+        target_conn = connect(target_db_path)
+        
         # Get all run IDs from the source database
         run_ids = get_runs(source_conn)
         log.info(f"Found {len(run_ids)} datasets to process")
@@ -296,9 +315,14 @@ def export_datasets_and_create_metadata_db(
         log.info(f"Processing complete. Status summary: {result_status}")
         return result_status
         
+    except Exception as e:
+        log.error(f"Database operation failed: {e}")
+        raise
     finally:
-        source_conn.close()
-        target_conn.close()
+        if source_conn is not None:
+            source_conn.close()
+        if target_conn is not None:
+            target_conn.close()
 
 
 def _process_single_dataset(
@@ -331,20 +355,23 @@ def _process_single_dataset(
     try:
         # Try to export to NetCDF
         log.info(f"Attempting to export dataset {run_id} to NetCDF")
-        netcdf_path = dataset.export("netcdf", path=export_path)
+        dataset.export("netcdf", path=export_path)
         
-        if netcdf_path is None:
+        # Check if export was successful by checking export_info
+        netcdf_export_path = dataset.export_info.export_paths.get("nc")
+        if netcdf_export_path is None:
             log.warning(f"Failed to export dataset {run_id} to NetCDF, copying as-is")
             return _copy_dataset_as_is(dataset, target_conn, target_exp_id)
             
-        # Load from NetCDF to create metadata-only dataset
-        log.info(f"Loading dataset {run_id} from NetCDF to create metadata-only version")
-        netcdf_dataset = load_from_netcdf(netcdf_path)
+        log.info(f"Successfully exported dataset {run_id} to {netcdf_export_path}")
+        
+        # Create metadata-only version by copying dataset structure without raw data
+        log.info(f"Creating metadata-only version of dataset {run_id}")
         
-        # Insert metadata-only version into target database
         with atomic(target_conn) as target_conn_atomic:
+            # Add run metadata to runs table, preserving original captured_run_id
             _, _, target_table_name = _add_run_to_runs_table(
-                netcdf_dataset, target_conn_atomic, target_exp_id
+                dataset, target_conn_atomic, target_exp_id
             )
             
             # Note: We deliberately don't populate the results table to keep only metadata
diff --git a/tests/dataset/test_export_datasets_and_create_metadata_db.py b/tests/dataset/test_export_datasets_and_create_metadata_db.py
@@ -245,6 +245,94 @@ def test_export_datasets_default_export_path(simple_dataset):
         assert run_id in result
 
 
+def test_export_datasets_handles_export_failure():
+    """Test that the function handles export failures gracefully"""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        source_db_path = Path(temp_dir) / "source.db"
+        target_db_path = Path(temp_dir) / "target.db"
+        export_path = Path(temp_dir) / "exports"
+        
+        # Create source database
+        source_conn = connect(source_db_path)
+        exp = load_or_create_experiment(
+            experiment_name="test_exp",
+            sample_name="test_sample",
+            conn=source_conn
+        )
+        
+        # Create interdependencies with problematic data that might fail export
+        x = ParamSpec("x", "text", unit="")  # Text data might be harder to export
+        y = ParamSpec("y", "numeric", unit="A")
+        interdeps = InterDependencies_(dependencies={y: (x,)})
+        
+        # Create dataset with mixed data types
+        dataset = DataSet(conn=source_conn, exp_id=exp.exp_id)
+        dataset.set_interdependencies(interdeps)
+        dataset.mark_started()
+        
+        # Add some data that might be challenging to export
+        for i in range(3):
+            dataset.add_results([{"x": f"text_{i}", "y": i**2}])
+        
+        dataset.mark_completed()
+        source_conn.close()
+        
+        # Run the export function
+        result = export_datasets_and_create_metadata_db(
+            source_db_path=source_db_path,
+            target_db_path=target_db_path,
+            export_path=export_path,
+        )
+        
+        # Should handle the dataset one way or another
+        assert len(result) == 1
+        assert dataset.run_id in result
+        # Should either export or copy as-is
+        assert result[dataset.run_id] in ["exported", "copied_as_is"]
+
+
+def test_export_datasets_nonexistent_source():
+    """Test behavior with non-existent source database"""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        source_db_path = Path(temp_dir) / "nonexistent.db"
+        target_db_path = Path(temp_dir) / "target.db"
+        export_path = Path(temp_dir) / "exports"
+        
+        # Should handle non-existent source gracefully
+        with pytest.raises((FileNotFoundError, OSError)):
+            export_datasets_and_create_metadata_db(
+                source_db_path=source_db_path,
+                target_db_path=target_db_path,
+                export_path=export_path,
+            )
+
+
+def test_export_datasets_readonly_target():
+    """Test behavior when target path is not writable"""
+    source_db_path, run_id = simple_dataset
+    
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create a read-only directory for target
+        readonly_dir = Path(temp_dir) / "readonly"
+        readonly_dir.mkdir()
+        readonly_dir.chmod(0o444)  # Read-only
+        
+        try:
+            target_db_path = readonly_dir / "target.db"
+            export_path = Path(temp_dir) / "exports"
+            
+            # Should handle permission errors gracefully
+            with pytest.raises((PermissionError, OSError)):
+                export_datasets_and_create_metadata_db(
+                    source_db_path=source_db_path,
+                    target_db_path=target_db_path,
+                    export_path=export_path,
+                )
+        finally:
+            # Restore permissions for cleanup
+            readonly_dir.chmod(0o755)
+
+
 @pytest.mark.parametrize(
     "upgrade_source,upgrade_target",
     [
@@ -273,4 +361,123 @@ def test_export_datasets_upgrade_flags(simple_dataset, upgrade_source, upgrade_t
         
         # Function should complete successfully regardless of upgrade flags
         # (assuming databases are already current version)
-        assert isinstance(result, dict)
+        assert isinstance(result, dict)
+
+
+def test_export_datasets_large_dataset_scenario():
+    """Test handling of a scenario with multiple datasets including edge cases"""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        source_db_path = Path(temp_dir) / "source.db"
+        target_db_path = Path(temp_dir) / "target.db"
+        export_path = Path(temp_dir) / "exports"
+        
+        # Create source database
+        source_conn = connect(source_db_path)
+        exp = load_or_create_experiment(
+            experiment_name="test_exp",
+            sample_name="test_sample",
+            conn=source_conn
+        )
+        
+        # Create interdependencies
+        x = ParamSpec("x", "numeric", unit="V")
+        y = ParamSpec("y", "numeric", unit="A")
+        interdeps = InterDependencies_(dependencies={y: (x,)})
+        
+        created_datasets = []
+        
+        # Create several datasets with different characteristics
+        for i in range(5):
+            dataset = DataSet(conn=source_conn, exp_id=exp.exp_id)
+            dataset.set_interdependencies(interdeps)
+            dataset.mark_started()
+            
+            # Add varying amounts of data
+            for j in range(i + 1):  # 1, 2, 3, 4, 5 data points respectively
+                dataset.add_results([{"x": j, "y": j * (i + 1)}])
+            
+            if i < 4:  # Leave one dataset incomplete
+                dataset.mark_completed()
+            
+            created_datasets.append(dataset)
+        
+        source_conn.close()
+        
+        # Run the export function
+        result = export_datasets_and_create_metadata_db(
+            source_db_path=source_db_path,
+            target_db_path=target_db_path,
+            export_path=export_path,
+        )
+        
+        # Check that all datasets were processed
+        assert len(result) == 5
+        
+        # Check that target database has all runs
+        target_conn = connect(target_db_path)
+        target_runs = get_runs(target_conn)
+        target_conn.close()
+        
+        assert len(target_runs) == 5
+        
+        # The incomplete dataset should be copied as-is
+        incomplete_dataset = created_datasets[-1]
+        assert result[incomplete_dataset.run_id] == "copied_as_is"
+
+
+def test_export_datasets_status_reporting():
+    """Test that the function returns detailed status information"""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        source_db_path = Path(temp_dir) / "source.db"
+        target_db_path = Path(temp_dir) / "target.db"
+        export_path = Path(temp_dir) / "exports"
+        
+        # Create source database with a completed dataset
+        source_conn = connect(source_db_path)
+        exp = load_or_create_experiment(
+            experiment_name="test_exp",
+            sample_name="test_sample",
+            conn=source_conn
+        )
+        
+        # Create interdependencies
+        x = ParamSpec("x", "numeric", unit="V")
+        y = ParamSpec("y", "numeric", unit="A")
+        interdeps = InterDependencies_(dependencies={y: (x,)})
+        
+        # Create and complete a dataset
+        dataset = DataSet(conn=source_conn, exp_id=exp.exp_id)
+        dataset.set_interdependencies(interdeps)
+        dataset.mark_started()
+        
+        for i in range(5):
+            dataset.add_results([{"x": i, "y": i**2}])
+        
+        dataset.mark_completed()
+        source_conn.close()
+        
+        # Run the export function
+        result = export_datasets_and_create_metadata_db(
+            source_db_path=source_db_path,
+            target_db_path=target_db_path,
+            export_path=export_path,
+        )
+        
+        # Check return value structure
+        assert isinstance(result, dict)
+        assert len(result) == 1
+        assert dataset.run_id in result
+        
+        # Status should be one of the expected values
+        status = result[dataset.run_id]
+        expected_statuses = ["exported", "copied_as_is", "already_exists"]
+        assert status in expected_statuses, f"Unexpected status: {status}"
+        
+        # If we run again, should report already_exists
+        result2 = export_datasets_and_create_metadata_db(
+            source_db_path=source_db_path,
+            target_db_path=target_db_path,
+            export_path=export_path,
+        )
+        
+        assert result2[dataset.run_id] == "already_exists"