@@ -987,3 +987,175 @@ def test_dry_run_does_not_call_delete_documents(self, db_session):
987987
988988 # Dry run reports what would be removed
989989 assert stats ["entities_removed" ] == 1
990+
991+
992+ class TestSearchIndexQuery :
993+ """Test WikidataEntity.search_index_query functionality."""
994+
995+ def _create_entity_with_labels (self , db_session , wikidata_id , name , labels ):
996+ """Helper to create a WikidataEntity with labels."""
997+ from poliloom .models import WikidataEntityLabel
998+
999+ stmt = insert (WikidataEntity ).values (
1000+ [{"wikidata_id" : wikidata_id , "name" : name }]
1001+ )
1002+ stmt = stmt .on_conflict_do_nothing (index_elements = ["wikidata_id" ])
1003+ db_session .execute (stmt )
1004+
1005+ for label in labels :
1006+ stmt = insert (WikidataEntityLabel ).values (
1007+ [{"entity_id" : wikidata_id , "label" : label }]
1008+ )
1009+ stmt = stmt .on_conflict_do_nothing (index_elements = ["entity_id" , "label" ])
1010+ db_session .execute (stmt )
1011+
1012+ db_session .flush ()
1013+
1014+ def _create_location (self , db_session , wikidata_id , name , labels ):
1015+ """Helper to create a Location entity with labels."""
1016+ from poliloom .models import Location
1017+
1018+ self ._create_entity_with_labels (db_session , wikidata_id , name , labels )
1019+
1020+ stmt = insert (Location .__table__ ).values ([{"wikidata_id" : wikidata_id }])
1021+ stmt = stmt .on_conflict_do_nothing (index_elements = ["wikidata_id" ])
1022+ db_session .execute (stmt )
1023+ db_session .flush ()
1024+
1025+ def _create_country (self , db_session , wikidata_id , name , labels ):
1026+ """Helper to create a Country entity with labels."""
1027+ from poliloom .models import Country
1028+
1029+ self ._create_entity_with_labels (db_session , wikidata_id , name , labels )
1030+
1031+ stmt = insert (Country .__table__ ).values ([{"wikidata_id" : wikidata_id }])
1032+ stmt = stmt .on_conflict_do_nothing (index_elements = ["wikidata_id" ])
1033+ db_session .execute (stmt )
1034+ db_session .flush ()
1035+
1036+ def _create_position (self , db_session , wikidata_id , name , labels ):
1037+ """Helper to create a Position entity with labels."""
1038+ from poliloom .models import Position
1039+
1040+ self ._create_entity_with_labels (db_session , wikidata_id , name , labels )
1041+
1042+ stmt = insert (Position .__table__ ).values ([{"wikidata_id" : wikidata_id }])
1043+ stmt = stmt .on_conflict_do_nothing (index_elements = ["wikidata_id" ])
1044+ db_session .execute (stmt )
1045+ db_session .flush ()
1046+
1047+ def test_returns_entity_with_single_type (self , db_session ):
1048+ """Test query returns entity with single type."""
1049+ self ._create_location (db_session , "Q60" , "New York City" , ["New York" , "NYC" ])
1050+
1051+ query = WikidataEntity .search_index_query ()
1052+ results = db_session .execute (query ).fetchall ()
1053+
1054+ assert len (results ) == 1
1055+ row = results [0 ]
1056+ assert row .wikidata_id == "Q60"
1057+ assert "Location" in row .types
1058+ assert set (row .labels ) == {"New York" , "NYC" }
1059+
1060+ def test_returns_entity_with_multiple_types (self , db_session ):
1061+ """Test query aggregates multiple types for same entity."""
1062+ from poliloom .models import Location , Country
1063+
1064+ # Germany is both a Location and a Country
1065+ self ._create_entity_with_labels (
1066+ db_session , "Q183" , "Germany" , ["Germany" , "Deutschland" ]
1067+ )
1068+
1069+ # Add to both tables
1070+ stmt = insert (Location .__table__ ).values ([{"wikidata_id" : "Q183" }])
1071+ stmt = stmt .on_conflict_do_nothing (index_elements = ["wikidata_id" ])
1072+ db_session .execute (stmt )
1073+
1074+ stmt = insert (Country .__table__ ).values ([{"wikidata_id" : "Q183" }])
1075+ stmt = stmt .on_conflict_do_nothing (index_elements = ["wikidata_id" ])
1076+ db_session .execute (stmt )
1077+ db_session .flush ()
1078+
1079+ query = WikidataEntity .search_index_query ()
1080+ results = db_session .execute (query ).fetchall ()
1081+
1082+ assert len (results ) == 1
1083+ row = results [0 ]
1084+ assert row .wikidata_id == "Q183"
1085+ assert "Location" in row .types
1086+ assert "Country" in row .types
1087+ assert set (row .labels ) == {"Germany" , "Deutschland" }
1088+
1089+ def test_returns_multiple_entities (self , db_session ):
1090+ """Test query returns multiple entities."""
1091+ self ._create_location (db_session , "Q60" , "New York City" , ["NYC" ])
1092+ self ._create_position (db_session , "Q30185" , "Mayor" , ["Mayor" , "Bürgermeister" ])
1093+
1094+ query = WikidataEntity .search_index_query ()
1095+ results = db_session .execute (query ).fetchall ()
1096+
1097+ assert len (results ) == 2
1098+ results_by_id = {r .wikidata_id : r for r in results }
1099+
1100+ assert "Location" in results_by_id ["Q60" ].types
1101+ assert set (results_by_id ["Q60" ].labels ) == {"NYC" }
1102+
1103+ assert "Position" in results_by_id ["Q30185" ].types
1104+ assert set (results_by_id ["Q30185" ].labels ) == {"Mayor" , "Bürgermeister" }
1105+
1106+ def test_excludes_soft_deleted_entities (self , db_session ):
1107+ """Test query excludes soft-deleted entities."""
1108+ from datetime import datetime , timezone
1109+
1110+ self ._create_location (db_session , "Q60" , "New York City" , ["NYC" ])
1111+ self ._create_location (db_session , "Q84" , "London" , ["London" ])
1112+
1113+ # Soft-delete London
1114+ db_session .execute (
1115+ WikidataEntity .__table__ .update ()
1116+ .where (WikidataEntity .wikidata_id == "Q84" )
1117+ .values (deleted_at = datetime .now (timezone .utc ))
1118+ )
1119+ db_session .flush ()
1120+
1121+ query = WikidataEntity .search_index_query ()
1122+ results = db_session .execute (query ).fetchall ()
1123+
1124+ assert len (results ) == 1
1125+ assert results [0 ].wikidata_id == "Q60"
1126+
1127+ def test_excludes_entities_not_in_model_tables (self , db_session ):
1128+ """Test query only returns entities that exist in model tables."""
1129+ # Create entity with labels but NOT in any model table
1130+ self ._create_entity_with_labels (db_session , "Q999" , "Orphan Entity" , ["Orphan" ])
1131+
1132+ # Create proper location
1133+ self ._create_location (db_session , "Q60" , "New York City" , ["NYC" ])
1134+
1135+ query = WikidataEntity .search_index_query ()
1136+ results = db_session .execute (query ).fetchall ()
1137+
1138+ # Only the location should be returned
1139+ assert len (results ) == 1
1140+ assert results [0 ].wikidata_id == "Q60"
1141+
1142+ def test_pagination_with_offset_and_limit (self , db_session ):
1143+ """Test query supports pagination with offset and limit."""
1144+ # Create multiple locations
1145+ for i in range (5 ):
1146+ self ._create_location (db_session , f"Q{ i } " , f"Location { i } " , [f"Label { i } " ])
1147+
1148+ query = WikidataEntity .search_index_query ()
1149+
1150+ # Get first 2
1151+ results_page1 = db_session .execute (query .limit (2 )).fetchall ()
1152+ assert len (results_page1 ) == 2
1153+
1154+ # Get next 2
1155+ results_page2 = db_session .execute (query .offset (2 ).limit (2 )).fetchall ()
1156+ assert len (results_page2 ) == 2
1157+
1158+ # Verify no overlap
1159+ page1_ids = {r .wikidata_id for r in results_page1 }
1160+ page2_ids = {r .wikidata_id for r in results_page2 }
1161+ assert page1_ids .isdisjoint (page2_ids )
0 commit comments