Skip to content

Commit 504f2d0

Browse files
Copilotastafan8
andcommitted
Improve error handling and add comprehensive test coverage
Co-authored-by: astafan8 <15662810+astafan8@users.noreply.github.com>
1 parent 8b60dc8 commit 504f2d0

2 files changed

Lines changed: 262 additions & 28 deletions

File tree

src/qcodes/dataset/database_extract_runs.py

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -218,36 +218,55 @@ def export_datasets_and_create_metadata_db(
218218
else:
219219
export_path = Path(export_path)
220220

221+
# Validate source database exists
222+
if not source_db_path.exists():
223+
raise FileNotFoundError(f"Source database file not found: {source_db_path}")
224+
221225
log.info(f"Starting export process from {source_db_path} to {target_db_path}")
222226
log.info(f"NetCDF files will be exported to {export_path}")
223227

224228
# Check database versions
225-
(s_v, new_v) = get_db_version_and_newest_available_version(source_db_path)
226-
if s_v < new_v and not upgrade_source_db:
227-
warn(
228-
f"Source DB version is {s_v}, but this function needs it to be"
229-
f" in version {new_v}. Run this function again with "
230-
"upgrade_source_db=True to auto-upgrade the source DB file."
231-
)
232-
return {}
233-
234-
if target_db_path.exists():
235-
(t_v, new_v) = get_db_version_and_newest_available_version(target_db_path)
236-
if t_v < new_v and not upgrade_target_db:
229+
try:
230+
(s_v, new_v) = get_db_version_and_newest_available_version(source_db_path)
231+
if s_v < new_v and not upgrade_source_db:
237232
warn(
238-
f"Target DB version is {t_v}, but this function needs it to "
239-
f"be in version {new_v}. Run this function again with "
240-
"upgrade_target_db=True to auto-upgrade the target DB file."
233+
f"Source DB version is {s_v}, but this function needs it to be"
234+
f" in version {new_v}. Run this function again with "
235+
"upgrade_source_db=True to auto-upgrade the source DB file."
241236
)
242237
return {}
238+
except Exception as e:
239+
log.error(f"Failed to check source database version: {e}")
240+
raise
241+
242+
if target_db_path.exists():
243+
try:
244+
(t_v, new_v) = get_db_version_and_newest_available_version(target_db_path)
245+
if t_v < new_v and not upgrade_target_db:
246+
warn(
247+
f"Target DB version is {t_v}, but this function needs it to "
248+
f"be in version {new_v}. Run this function again with "
249+
"upgrade_target_db=True to auto-upgrade the target DB file."
250+
)
251+
return {}
252+
except Exception as e:
253+
log.error(f"Failed to check target database version: {e}")
254+
raise
243255

244256
# Create export directory if it doesn't exist
245-
export_path.mkdir(parents=True, exist_ok=True)
257+
try:
258+
export_path.mkdir(parents=True, exist_ok=True)
259+
except Exception as e:
260+
log.error(f"Failed to create export directory {export_path}: {e}")
261+
raise
246262

247-
source_conn = connect(source_db_path)
248-
target_conn = connect(target_db_path)
263+
source_conn = None
264+
target_conn = None
249265

250266
try:
267+
source_conn = connect(source_db_path)
268+
target_conn = connect(target_db_path)
269+
251270
# Get all run IDs from the source database
252271
run_ids = get_runs(source_conn)
253272
log.info(f"Found {len(run_ids)} datasets to process")
@@ -296,9 +315,14 @@ def export_datasets_and_create_metadata_db(
296315
log.info(f"Processing complete. Status summary: {result_status}")
297316
return result_status
298317

318+
except Exception as e:
319+
log.error(f"Database operation failed: {e}")
320+
raise
299321
finally:
300-
source_conn.close()
301-
target_conn.close()
322+
if source_conn is not None:
323+
source_conn.close()
324+
if target_conn is not None:
325+
target_conn.close()
302326

303327

304328
def _process_single_dataset(
@@ -331,20 +355,23 @@ def _process_single_dataset(
331355
try:
332356
# Try to export to NetCDF
333357
log.info(f"Attempting to export dataset {run_id} to NetCDF")
334-
netcdf_path = dataset.export("netcdf", path=export_path)
358+
dataset.export("netcdf", path=export_path)
335359

336-
if netcdf_path is None:
360+
# Check if export was successful by checking export_info
361+
netcdf_export_path = dataset.export_info.export_paths.get("nc")
362+
if netcdf_export_path is None:
337363
log.warning(f"Failed to export dataset {run_id} to NetCDF, copying as-is")
338364
return _copy_dataset_as_is(dataset, target_conn, target_exp_id)
339365

340-
# Load from NetCDF to create metadata-only dataset
341-
log.info(f"Loading dataset {run_id} from NetCDF to create metadata-only version")
342-
netcdf_dataset = load_from_netcdf(netcdf_path)
366+
log.info(f"Successfully exported dataset {run_id} to {netcdf_export_path}")
367+
368+
# Create metadata-only version by copying dataset structure without raw data
369+
log.info(f"Creating metadata-only version of dataset {run_id}")
343370

344-
# Insert metadata-only version into target database
345371
with atomic(target_conn) as target_conn_atomic:
372+
# Add run metadata to runs table, preserving original captured_run_id
346373
_, _, target_table_name = _add_run_to_runs_table(
347-
netcdf_dataset, target_conn_atomic, target_exp_id
374+
dataset, target_conn_atomic, target_exp_id
348375
)
349376

350377
# Note: We deliberately don't populate the results table to keep only metadata

tests/dataset/test_export_datasets_and_create_metadata_db.py

Lines changed: 208 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,94 @@ def test_export_datasets_default_export_path(simple_dataset):
245245
assert run_id in result
246246

247247

248+
def test_export_datasets_handles_export_failure():
249+
"""Test that the function handles export failures gracefully"""
250+
with tempfile.TemporaryDirectory() as temp_dir:
251+
source_db_path = Path(temp_dir) / "source.db"
252+
target_db_path = Path(temp_dir) / "target.db"
253+
export_path = Path(temp_dir) / "exports"
254+
255+
# Create source database
256+
source_conn = connect(source_db_path)
257+
exp = load_or_create_experiment(
258+
experiment_name="test_exp",
259+
sample_name="test_sample",
260+
conn=source_conn
261+
)
262+
263+
# Create interdependencies with problematic data that might fail export
264+
x = ParamSpec("x", "text", unit="") # Text data might be harder to export
265+
y = ParamSpec("y", "numeric", unit="A")
266+
interdeps = InterDependencies_(dependencies={y: (x,)})
267+
268+
# Create dataset with mixed data types
269+
dataset = DataSet(conn=source_conn, exp_id=exp.exp_id)
270+
dataset.set_interdependencies(interdeps)
271+
dataset.mark_started()
272+
273+
# Add some data that might be challenging to export
274+
for i in range(3):
275+
dataset.add_results([{"x": f"text_{i}", "y": i**2}])
276+
277+
dataset.mark_completed()
278+
source_conn.close()
279+
280+
# Run the export function
281+
result = export_datasets_and_create_metadata_db(
282+
source_db_path=source_db_path,
283+
target_db_path=target_db_path,
284+
export_path=export_path,
285+
)
286+
287+
# Should handle the dataset one way or another
288+
assert len(result) == 1
289+
assert dataset.run_id in result
290+
# Should either export or copy as-is
291+
assert result[dataset.run_id] in ["exported", "copied_as_is"]
292+
293+
294+
def test_export_datasets_nonexistent_source():
295+
"""Test behavior with non-existent source database"""
296+
with tempfile.TemporaryDirectory() as temp_dir:
297+
source_db_path = Path(temp_dir) / "nonexistent.db"
298+
target_db_path = Path(temp_dir) / "target.db"
299+
export_path = Path(temp_dir) / "exports"
300+
301+
# Should handle non-existent source gracefully
302+
with pytest.raises((FileNotFoundError, OSError)):
303+
export_datasets_and_create_metadata_db(
304+
source_db_path=source_db_path,
305+
target_db_path=target_db_path,
306+
export_path=export_path,
307+
)
308+
309+
310+
def test_export_datasets_readonly_target():
311+
"""Test behavior when target path is not writable"""
312+
source_db_path, run_id = simple_dataset
313+
314+
with tempfile.TemporaryDirectory() as temp_dir:
315+
# Create a read-only directory for target
316+
readonly_dir = Path(temp_dir) / "readonly"
317+
readonly_dir.mkdir()
318+
readonly_dir.chmod(0o444) # Read-only
319+
320+
try:
321+
target_db_path = readonly_dir / "target.db"
322+
export_path = Path(temp_dir) / "exports"
323+
324+
# Should handle permission errors gracefully
325+
with pytest.raises((PermissionError, OSError)):
326+
export_datasets_and_create_metadata_db(
327+
source_db_path=source_db_path,
328+
target_db_path=target_db_path,
329+
export_path=export_path,
330+
)
331+
finally:
332+
# Restore permissions for cleanup
333+
readonly_dir.chmod(0o755)
334+
335+
248336
@pytest.mark.parametrize(
249337
"upgrade_source,upgrade_target",
250338
[
@@ -273,4 +361,123 @@ def test_export_datasets_upgrade_flags(simple_dataset, upgrade_source, upgrade_t
273361

274362
# Function should complete successfully regardless of upgrade flags
275363
# (assuming databases are already current version)
276-
assert isinstance(result, dict)
364+
assert isinstance(result, dict)
365+
366+
367+
def test_export_datasets_large_dataset_scenario():
368+
"""Test handling of a scenario with multiple datasets including edge cases"""
369+
with tempfile.TemporaryDirectory() as temp_dir:
370+
source_db_path = Path(temp_dir) / "source.db"
371+
target_db_path = Path(temp_dir) / "target.db"
372+
export_path = Path(temp_dir) / "exports"
373+
374+
# Create source database
375+
source_conn = connect(source_db_path)
376+
exp = load_or_create_experiment(
377+
experiment_name="test_exp",
378+
sample_name="test_sample",
379+
conn=source_conn
380+
)
381+
382+
# Create interdependencies
383+
x = ParamSpec("x", "numeric", unit="V")
384+
y = ParamSpec("y", "numeric", unit="A")
385+
interdeps = InterDependencies_(dependencies={y: (x,)})
386+
387+
created_datasets = []
388+
389+
# Create several datasets with different characteristics
390+
for i in range(5):
391+
dataset = DataSet(conn=source_conn, exp_id=exp.exp_id)
392+
dataset.set_interdependencies(interdeps)
393+
dataset.mark_started()
394+
395+
# Add varying amounts of data
396+
for j in range(i + 1): # 1, 2, 3, 4, 5 data points respectively
397+
dataset.add_results([{"x": j, "y": j * (i + 1)}])
398+
399+
if i < 4: # Leave one dataset incomplete
400+
dataset.mark_completed()
401+
402+
created_datasets.append(dataset)
403+
404+
source_conn.close()
405+
406+
# Run the export function
407+
result = export_datasets_and_create_metadata_db(
408+
source_db_path=source_db_path,
409+
target_db_path=target_db_path,
410+
export_path=export_path,
411+
)
412+
413+
# Check that all datasets were processed
414+
assert len(result) == 5
415+
416+
# Check that target database has all runs
417+
target_conn = connect(target_db_path)
418+
target_runs = get_runs(target_conn)
419+
target_conn.close()
420+
421+
assert len(target_runs) == 5
422+
423+
# The incomplete dataset should be copied as-is
424+
incomplete_dataset = created_datasets[-1]
425+
assert result[incomplete_dataset.run_id] == "copied_as_is"
426+
427+
428+
def test_export_datasets_status_reporting():
429+
"""Test that the function returns detailed status information"""
430+
with tempfile.TemporaryDirectory() as temp_dir:
431+
source_db_path = Path(temp_dir) / "source.db"
432+
target_db_path = Path(temp_dir) / "target.db"
433+
export_path = Path(temp_dir) / "exports"
434+
435+
# Create source database with a completed dataset
436+
source_conn = connect(source_db_path)
437+
exp = load_or_create_experiment(
438+
experiment_name="test_exp",
439+
sample_name="test_sample",
440+
conn=source_conn
441+
)
442+
443+
# Create interdependencies
444+
x = ParamSpec("x", "numeric", unit="V")
445+
y = ParamSpec("y", "numeric", unit="A")
446+
interdeps = InterDependencies_(dependencies={y: (x,)})
447+
448+
# Create and complete a dataset
449+
dataset = DataSet(conn=source_conn, exp_id=exp.exp_id)
450+
dataset.set_interdependencies(interdeps)
451+
dataset.mark_started()
452+
453+
for i in range(5):
454+
dataset.add_results([{"x": i, "y": i**2}])
455+
456+
dataset.mark_completed()
457+
source_conn.close()
458+
459+
# Run the export function
460+
result = export_datasets_and_create_metadata_db(
461+
source_db_path=source_db_path,
462+
target_db_path=target_db_path,
463+
export_path=export_path,
464+
)
465+
466+
# Check return value structure
467+
assert isinstance(result, dict)
468+
assert len(result) == 1
469+
assert dataset.run_id in result
470+
471+
# Status should be one of the expected values
472+
status = result[dataset.run_id]
473+
expected_statuses = ["exported", "copied_as_is", "already_exists"]
474+
assert status in expected_statuses, f"Unexpected status: {status}"
475+
476+
# If we run again, should report already_exists
477+
result2 = export_datasets_and_create_metadata_db(
478+
source_db_path=source_db_path,
479+
target_db_path=target_db_path,
480+
export_path=export_path,
481+
)
482+
483+
assert result2[dataset.run_id] == "already_exists"

0 commit comments

Comments
 (0)