|
1 | 1 | """Tests for WikidataEntity model.""" |
2 | 2 |
|
| 3 | +from unittest.mock import patch, Mock |
| 4 | + |
3 | 5 | from sqlalchemy.dialects.postgresql import insert |
4 | 6 |
|
5 | 7 | from poliloom.models import WikidataEntity, WikidataRelation, RelationType |
@@ -856,3 +858,151 @@ def test_entity_with_subclass_relation_to_hierarchy(self, db_session): |
856 | 858 | # Position should remain |
857 | 859 | count = db_session.execute(text("SELECT COUNT(*) FROM positions")).scalar() |
858 | 860 | assert count == 1 |
| 861 | + |
| 862 | + |
| 863 | +class TestCleanupOutsideHierarchySearchService: |
| 864 | + """Test that cleanup_outside_hierarchy calls search service for deletion. |
| 865 | +
|
| 866 | + Uses Location because it has SearchIndexedMixin (Position does not). |
| 867 | + """ |
| 868 | + |
| 869 | + def _create_hierarchy(self, db_session, root_id, child_ids): |
| 870 | + """Helper to create a hierarchy with root and children.""" |
| 871 | + # Create wikidata entities |
| 872 | + entities = [{"wikidata_id": root_id, "name": f"Root {root_id}"}] |
| 873 | + for child_id in child_ids: |
| 874 | + entities.append({"wikidata_id": child_id, "name": f"Child {child_id}"}) |
| 875 | + |
| 876 | + stmt = insert(WikidataEntity).values(entities) |
| 877 | + stmt = stmt.on_conflict_do_nothing(index_elements=["wikidata_id"]) |
| 878 | + db_session.execute(stmt) |
| 879 | + |
| 880 | + # Create subclass relations |
| 881 | + relations = [] |
| 882 | + for child_id in child_ids: |
| 883 | + relations.append( |
| 884 | + { |
| 885 | + "parent_entity_id": root_id, |
| 886 | + "child_entity_id": child_id, |
| 887 | + "relation_type": RelationType.SUBCLASS_OF, |
| 888 | + "statement_id": f"{child_id}$subclass-of-{root_id}", |
| 889 | + } |
| 890 | + ) |
| 891 | + |
| 892 | + if relations: |
| 893 | + stmt = insert(WikidataRelation).values(relations) |
| 894 | + stmt = stmt.on_conflict_do_nothing(index_elements=["statement_id"]) |
| 895 | + db_session.execute(stmt) |
| 896 | + |
| 897 | + db_session.flush() |
| 898 | + |
| 899 | + def _create_location_in_hierarchy(self, db_session, location_id, class_id): |
| 900 | + """Helper to create a location that is an instance of a hierarchy class.""" |
| 901 | + from poliloom.models import Location |
| 902 | + |
| 903 | + # Create wikidata entity for the location |
| 904 | + stmt = insert(WikidataEntity).values( |
| 905 | + [{"wikidata_id": location_id, "name": f"Location {location_id}"}] |
| 906 | + ) |
| 907 | + stmt = stmt.on_conflict_do_nothing(index_elements=["wikidata_id"]) |
| 908 | + db_session.execute(stmt) |
| 909 | + |
| 910 | + # Create location record |
| 911 | + stmt = insert(Location.__table__).values([{"wikidata_id": location_id}]) |
| 912 | + stmt = stmt.on_conflict_do_nothing(index_elements=["wikidata_id"]) |
| 913 | + db_session.execute(stmt) |
| 914 | + |
| 915 | + # Create instance_of relation to the class |
| 916 | + stmt = insert(WikidataRelation).values( |
| 917 | + [ |
| 918 | + { |
| 919 | + "parent_entity_id": class_id, |
| 920 | + "child_entity_id": location_id, |
| 921 | + "relation_type": RelationType.INSTANCE_OF, |
| 922 | + "statement_id": f"{location_id}$instance-of-{class_id}", |
| 923 | + } |
| 924 | + ] |
| 925 | + ) |
| 926 | + stmt = stmt.on_conflict_do_nothing(index_elements=["statement_id"]) |
| 927 | + db_session.execute(stmt) |
| 928 | + db_session.flush() |
| 929 | + |
| 930 | + def _create_orphan_location(self, db_session, location_id): |
| 931 | + """Helper to create a location with no hierarchy relations.""" |
| 932 | + from poliloom.models import Location |
| 933 | + |
| 934 | + # Create wikidata entity |
| 935 | + stmt = insert(WikidataEntity).values( |
| 936 | + [{"wikidata_id": location_id, "name": f"Orphan Location {location_id}"}] |
| 937 | + ) |
| 938 | + stmt = stmt.on_conflict_do_nothing(index_elements=["wikidata_id"]) |
| 939 | + db_session.execute(stmt) |
| 940 | + |
| 941 | + # Create location record (no relations) |
| 942 | + stmt = insert(Location.__table__).values([{"wikidata_id": location_id}]) |
| 943 | + stmt = stmt.on_conflict_do_nothing(index_elements=["wikidata_id"]) |
| 944 | + db_session.execute(stmt) |
| 945 | + db_session.flush() |
| 946 | + |
| 947 | + def test_cleanup_calls_delete_documents(self, db_session): |
| 948 | + """Test that cleanup_outside_hierarchy calls delete_documents on search service.""" |
| 949 | + from poliloom.models import Location |
| 950 | + |
| 951 | + # Create hierarchy (Q486972 is "human settlement" in Location._hierarchy_roots) |
| 952 | + self._create_hierarchy(db_session, "Q486972", ["Q100"]) |
| 953 | + |
| 954 | + # Create a valid location |
| 955 | + self._create_location_in_hierarchy(db_session, "Q200", "Q100") |
| 956 | + |
| 957 | + # Create orphan locations that will be deleted |
| 958 | + self._create_orphan_location(db_session, "Q300") |
| 959 | + self._create_orphan_location(db_session, "Q301") |
| 960 | + |
| 961 | + # Mock the SearchService (imported locally in cleanup_outside_hierarchy) |
| 962 | + mock_search_service = Mock() |
| 963 | + with patch("poliloom.search.SearchService", return_value=mock_search_service): |
| 964 | + stats = Location.cleanup_outside_hierarchy(db_session, dry_run=False) |
| 965 | + |
| 966 | + # Verify delete_documents was called with the orphan IDs |
| 967 | + assert stats["entities_removed"] == 2 |
| 968 | + mock_search_service.delete_documents.assert_called_once() |
| 969 | + deleted_ids = mock_search_service.delete_documents.call_args[0][0] |
| 970 | + assert set(deleted_ids) == {"Q300", "Q301"} |
| 971 | + |
| 972 | + def test_cleanup_does_not_call_delete_when_nothing_removed(self, db_session): |
| 973 | + """Test that delete_documents is not called when no entities are removed.""" |
| 974 | + from poliloom.models import Location |
| 975 | + |
| 976 | + # Create hierarchy |
| 977 | + self._create_hierarchy(db_session, "Q486972", ["Q100"]) |
| 978 | + |
| 979 | + # Create only valid locations (no orphans) |
| 980 | + self._create_location_in_hierarchy(db_session, "Q200", "Q100") |
| 981 | + |
| 982 | + # Mock the SearchService |
| 983 | + mock_search_service = Mock() |
| 984 | + with patch("poliloom.search.SearchService", return_value=mock_search_service): |
| 985 | + stats = Location.cleanup_outside_hierarchy(db_session, dry_run=False) |
| 986 | + |
| 987 | + # No entities removed, so delete_documents should not be called |
| 988 | + assert stats["entities_removed"] == 0 |
| 989 | + mock_search_service.delete_documents.assert_not_called() |
| 990 | + |
| 991 | + def test_dry_run_does_not_call_delete_documents(self, db_session): |
| 992 | + """Test that dry_run=True does not call delete_documents.""" |
| 993 | + from poliloom.models import Location |
| 994 | + |
| 995 | + # Create hierarchy |
| 996 | + self._create_hierarchy(db_session, "Q486972", ["Q100"]) |
| 997 | + |
| 998 | + # Create orphan location |
| 999 | + self._create_orphan_location(db_session, "Q300") |
| 1000 | + |
| 1001 | + # Mock the SearchService |
| 1002 | + mock_search_service = Mock() |
| 1003 | + with patch("poliloom.search.SearchService", return_value=mock_search_service): |
| 1004 | + stats = Location.cleanup_outside_hierarchy(db_session, dry_run=True) |
| 1005 | + |
| 1006 | + # Dry run reports what would be removed but doesn't call delete_documents |
| 1007 | + assert stats["entities_removed"] == 1 |
| 1008 | + mock_search_service.delete_documents.assert_not_called() |
0 commit comments