1010import pytest
1111
1212from tests .utils import generate_sample_embeddings_for_run
13- from timdex_dataset_api import TIMDEXDataset
1413from timdex_dataset_api .embeddings import (
1514 METADATA_SELECT_FILTER_COLUMNS ,
1615 TIMDEX_DATASET_EMBEDDINGS_SCHEMA ,
@@ -302,9 +301,7 @@ def test_current_embeddings_view_single_run(timdex_dataset_for_embeddings_views)
302301
303302 # write embeddings for run "apple-1"
304303 td .embeddings .write (generate_sample_embeddings_for_run (td , run_id = "apple-1" ))
305-
306- # NOTE: at time of test creation, this manual reload is required
307- td = TIMDEXDataset (td .location )
304+ td .refresh ()
308305
309306 # query current_embeddings for apple source using read_dataframe
310307 result = td .embeddings .read_dataframe (table = "current_embeddings" , source = "apple" )
@@ -320,9 +317,7 @@ def test_current_embeddings_view_multiple_runs(timdex_dataset_for_embeddings_vie
320317 # write embeddings for runs "orange-1" and "orange-2"
321318 td .embeddings .write (generate_sample_embeddings_for_run (td , run_id = "orange-1" ))
322319 td .embeddings .write (generate_sample_embeddings_for_run (td , run_id = "orange-2" ))
323-
324- # NOTE: at time of test creation, this manual reload is required
325- td = TIMDEXDataset (td .location )
320+ td .refresh ()
326321
327322 # query current_embeddings for orange source using read_dataframe
328323 result = td .embeddings .read_dataframe (table = "current_embeddings" , source = "orange" )
@@ -363,9 +358,7 @@ def test_current_embeddings_view_handles_duplicate_run_embeddings(
363358 td , run_id = "lemon-2" , embedding_timestamp = "2025-08-03T00:00:00+00:00"
364359 )
365360 )
366-
367- # NOTE: at time of test creation, this manual reload is required
368- td = TIMDEXDataset (td .location )
361+ td .refresh ()
369362
370363 # check all embeddings for lemon-2 to verify both writes exist
371364 all_lemon_2 = td .embeddings .read_dataframe (table = "embeddings" , run_id = "lemon-2" )
@@ -416,9 +409,7 @@ def test_embeddings_view_includes_all_embeddings(timdex_dataset_for_embeddings_v
416409 td , run_id = "lemon-2" , embedding_timestamp = "2025-08-03T00:00:00+00:00"
417410 )
418411 )
419-
420- # NOTE: at time of test creation, this manual reload is required
421- td = TIMDEXDataset (td .location )
412+ td .refresh ()
422413
423414 # query all embeddings for lemon source
424415 result = td .embeddings .read_dataframe (table = "embeddings" , source = "lemon" )
@@ -435,3 +426,25 @@ def test_embeddings_view_includes_all_embeddings(timdex_dataset_for_embeddings_v
435426 lemon_2_embeddings = result [result ["run_id" ] == "lemon-2" ]
436427 assert len (lemon_2_embeddings ) == 10 # 5 from each write
437428 assert (lemon_2_embeddings ["run_date" ] == date (2025 , 8 , 2 )).all ()
429+
430+
431+ def test_embeddings_read_batches_iter_returns_empty_when_embeddings_missing (
432+ timdex_dataset_empty , caplog
433+ ):
434+ result = list (timdex_dataset_empty .embeddings .read_batches_iter ())
435+ assert result == []
436+ assert (
437+ "Table 'embeddings' not found in DuckDB context. Embeddings may not yet exist "
438+ "or TIMDEXDataset.refresh() may be required." in caplog .text
439+ )
440+
441+
442+ def test_embeddings_read_batches_iter_returns_empty_for_invalid_table (
443+ timdex_embeddings_with_runs , caplog
444+ ):
445+ """read_batches_iter returns empty iterator for nonexistent table name."""
446+ with pytest .raises (
447+ ValueError ,
448+ match = "Invalid table: 'nonexistent'" ,
449+ ):
450+ list (timdex_embeddings_with_runs .read_batches_iter (table = "nonexistent" ))
0 commit comments