diff --git a/bin/scripts/fixes/italy_dedup_flagged_pairs.py b/bin/scripts/fixes/italy_dedup_flagged_pairs.py new file mode 100644 index 000000000..787baa952 --- /dev/null +++ b/bin/scripts/fixes/italy_dedup_flagged_pairs.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +"""Drop the legacy half of the 6 unambiguous duplicate pairs flagged by the +issue #1349 city remap. Two ambiguous pairs (Sermide/Felonica and +Corteolona/Genzone) are intentionally left for maintainer review — neither +of their records carries the modern ISTAT-canonical merged name. + +Mapping (id_to_drop -> reason): + 58976 'Pozzaglio' -> kept 'Pozzaglio ed Uniti' (id 58977, ISTAT canonical, Q42226) + 139523 'Limite' -> kept 'Capraia e Limite' (id 136799, ISTAT canonical, Q82639) + 140714 'Napoli' -> kept 'Naples' (id 140713, English name = repo convention) + 139215 'Inverno' -> kept 'Inverno e Monteleone' (id 139216, ISTAT canonical, Q39917) + 61530 'Trinità d\\'Agultu' -> kept 'Trinità d\\'Agultu e Vignola' (id 61531, ISTAT canonical, Q341096) + 61329 'Torino' -> kept 'Turin' (id 61575, English name = repo convention) + +Idempotent: re-running on already-deduplicated data produces 0 changes. + +Usage: + python3 bin/scripts/fixes/italy_dedup_flagged_pairs.py [--dry-run] +""" + +from __future__ import annotations + +import argparse +import json +import sys +from pathlib import Path +from typing import List, Optional + +REPO_ROOT = Path(__file__).resolve().parents[3] +CITIES_JSON = REPO_ROOT / "contributions/cities/IT.json" + +# id -> (city name, kept-id, kept-name, reason) +DROP = { + 58976: ("Pozzaglio", 58977, "Pozzaglio ed Uniti", "ISTAT canonical merged comune"), + 139523: ("Limite", 136799, "Capraia e Limite", "ISTAT canonical merged comune"), + 140714: ("Napoli", 140713, "Naples", "Italian-name duplicate of English-named record"), + 139215: ("Inverno", 139216, "Inverno e Monteleone", "ISTAT canonical merged comune"), + 61530: ("Trinità d'Agultu", 61531, "Trinità d'Agultu e Vignola", "ISTAT canonical merged comune"), + 61329: ("Torino", 61575, "Turin", "Italian-name duplicate of English-named record"), +} + + +def main(argv: Optional[List[str]] = None) -> int: + parser = argparse.ArgumentParser(description=__doc__.split("\n", 1)[0]) + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args(argv) + + cities = json.loads(CITIES_JSON.read_text(encoding="utf-8")) + by_id = {c["id"]: c for c in cities} + + # Verify preconditions: every drop target must exist with the expected name, + # and every keep target must exist with the expected name. Refuse to mutate + # otherwise — these are irreversible deletions, no silent failures. + errors: List[str] = [] + for drop_id, (drop_name, keep_id, keep_name, _) in DROP.items(): + d = by_id.get(drop_id) + k = by_id.get(keep_id) + if d is None: + # Missing drop target = already deduped; treat as idempotent skip later. + continue + if d.get("name") != drop_name: + errors.append(f"id={drop_id} expected name={drop_name!r}, found {d.get('name')!r}") + if k is None: + errors.append(f"keep target id={keep_id} (would-be parent of dropped id={drop_id}) is missing") + elif k.get("name") != keep_name: + errors.append(f"keep target id={keep_id} expected name={keep_name!r}, found {k.get('name')!r}") + if errors: + print("Preconditions failed; refusing to mutate IT.json:", file=sys.stderr) + for e in errors: + print(f" - {e}", file=sys.stderr) + return 2 + + dropped: List[dict] = [] + skipped_already_gone: List[int] = [] + new_cities: List[dict] = [] + for c in cities: + if c["id"] in DROP: + dropped.append( + { + "id": c["id"], + "name": c["name"], + "kept_id": DROP[c["id"]][1], + "kept_name": DROP[c["id"]][2], + "reason": DROP[c["id"]][3], + } + ) + else: + new_cities.append(c) + + for drop_id in DROP: + if drop_id not in by_id: + skipped_already_gone.append(drop_id) + + print(f"Input cities: {len(cities)}") + print(f"Dropped this run: {len(dropped)}") + print(f"Already gone (idempotent skip): {len(skipped_already_gone)}") + print(f"Output cities: {len(new_cities)}") + print() + print("Drops:") + for d in dropped: + print(f" id={d['id']:7} {d['name']!r:35} -> kept id={d['kept_id']} ({d['kept_name']!r}) [{d['reason']}]") + + if args.dry_run: + print("\n--dry-run: IT.json not modified.") + return 0 + + if not dropped: + print("\nNothing to drop; IT.json untouched.") + return 0 + + text = json.dumps(new_cities, ensure_ascii=False, indent=2) + CITIES_JSON.write_text(text, encoding="utf-8") + print(f"\nWrote {len(new_cities)} cities to {CITIES_JSON.relative_to(REPO_ROOT)}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/contributions/cities/IT.json b/contributions/cities/IT.json index 475c46b64..2242d53bf 100644 --- a/contributions/cities/IT.json +++ b/contributions/cities/IT.json @@ -31733,47 +31733,6 @@ "flag": 1, "wikiDataId": "Q18454245" }, - { - "id": 58976, - "name": "Pozzaglio", - "state_id": 1751, - "state_code": "CR", - "country_id": 107, - "country_code": "IT", - "type": "adm3", - "level": null, - "parent_id": null, - "latitude": "45.19999000", - "longitude": "10.05345000", - "native": "Pozzaglio", - "population": 894, - "timezone": "Europe/Rome", - "translations": { - "br": "Pozzaglio", - "ko": "포차글리오", - "pt-BR": "Pozzaglio", - "pt": "Pozzaglio", - "nl": "Pozzaglio", - "hr": "Pozzaglio", - "fa": "پوززالیو", - "de": "Pozzaglio", - "es": "Pozzaglio", - "fr": "Pozzaglio", - "ja": "ポッツァリオ", - "it": "Pozzaglio", - "zh-CN": "波扎利奥", - "tr": "Pozzaglio", - "ru": "Поццальо", - "uk": "Поццальйо", - "pl": "Pozzaglio", - "hi": "पॉज़ाग्लियो", - "ar": "بوتزاليو" - }, - "created_at": "2019-10-06T10:06:45", - "updated_at": "2025-12-02T14:38:56", - "flag": 1, - "wikiDataId": "Q18488587" - }, { "id": 58977, "name": "Pozzaglio ed Uniti", @@ -128206,47 +128165,6 @@ "flag": 1, "wikiDataId": "Q35504" }, - { - "id": 61329, - "name": "Torino", - "state_id": 5637, - "state_code": "TO", - "country_id": 107, - "country_code": "IT", - "type": "adm3", - "level": null, - "parent_id": null, - "latitude": "45.13333000", - "longitude": "7.36667000", - "native": "Torino", - "population": null, - "timezone": "Europe/Rome", - "translations": { - "br": "Torino", - "ko": "토리노", - "pt-BR": "Turim", - "pt": "Turim", - "nl": "Turijn", - "hr": "Torino", - "fa": "تورینو", - "de": "Turin", - "es": "Turín", - "fr": "Turin", - "ja": "トリノ", - "it": "Torino", - "zh-CN": "都灵", - "tr": "Torino", - "ru": "Турин", - "uk": "Турин", - "pl": "Turyn", - "hi": "टोरिनो", - "ar": "تورينو" - }, - "created_at": "2019-10-06T10:07:16", - "updated_at": "2025-12-11T20:19:20", - "flag": 1, - "wikiDataId": "Q495" - }, { "id": 61330, "name": "Torino di Sangro", @@ -136447,47 +136365,6 @@ "flag": 1, "wikiDataId": "Q20520" }, - { - "id": 61530, - "name": "Trinità d'Agultu", - "state_id": 1722, - "state_code": "SS", - "country_id": 107, - "country_code": "IT", - "type": "adm3", - "level": null, - "parent_id": null, - "latitude": "40.98589000", - "longitude": "8.91377000", - "native": "Trinità di Agult", - "population": 1287, - "timezone": "Europe/Rome", - "translations": { - "br": "Trinità d'Agultu", - "ko": "트리니타 다굴투", - "pt-BR": "Trindade de Agultu", - "pt": "Trindade de Agultu", - "nl": "Drie-eenheid van Agultu", - "hr": "Trojstvo Agultu", - "fa": "ترینیتا د آگولتو", - "de": "Trinità d'Agultu", - "es": "Trinidad de Agultu", - "fr": "Trinité d'Agultu", - "ja": "トリニタ・ダグルトゥ", - "it": "Trinità d'Agultu", - "zh-CN": "阿古尔图的圣三一教堂", - "tr": "Trinità d'Agultu", - "ru": "Trinità d'Agultu", - "uk": "Трініта д'Агульту", - "pl": "Trójca Agultu", - "hi": "ट्रिनिटा डी'अगुल्टु", - "ar": "ترينيتا داغولتو" - }, - "created_at": "2019-10-06T10:07:21", - "updated_at": "2025-12-11T20:19:11", - "flag": 1, - "wikiDataId": null - }, { "id": 61531, "name": "Trinità d'Agultu e Vignola", @@ -330705,47 +330582,6 @@ "flag": 1, "wikiDataId": "Q47645" }, - { - "id": 139215, - "name": "Inverno", - "state_id": 1676, - "state_code": "PV", - "country_id": 107, - "country_code": "IT", - "type": "adm3", - "level": null, - "parent_id": null, - "latitude": "45.19859000", - "longitude": "9.38429000", - "native": "Inverno", - "population": 647, - "timezone": "Europe/Rome", - "translations": { - "br": "Inverno", - "ko": "인베르노", - "pt-BR": "Inverno", - "pt": "Inverno", - "nl": "Winter", - "hr": "Zima", - "fa": "اینورنو", - "de": "Winter", - "es": "Invierno", - "fr": "Hiver", - "ja": "インヴェルノ", - "it": "Inverno", - "zh-CN": "因弗诺", - "tr": "Inverno", - "ru": "Инверно", - "uk": "Інверно", - "pl": "Inverno", - "hi": "इनवर्नो", - "ar": "إنفيرنو" - }, - "created_at": "2019-10-06T10:39:42", - "updated_at": "2025-12-02T14:38:56", - "flag": 1, - "wikiDataId": "Q18492895" - }, { "id": 139216, "name": "Inverno e Monteleone", @@ -343333,47 +343169,6 @@ "flag": 1, "wikiDataId": "Q491839" }, - { - "id": 139523, - "name": "Limite", - "state_id": 5630, - "state_code": "FI", - "country_id": 107, - "country_code": "IT", - "type": "adm3", - "level": null, - "parent_id": null, - "latitude": "43.74450000", - "longitude": "10.97996000", - "native": "Limite", - "population": 4530, - "timezone": "Europe/Rome", - "translations": { - "br": "Bevennet", - "ko": "리미트", - "pt-BR": "Limite", - "pt": "Limite", - "nl": "Beperkt", - "hr": "Ograničeno", - "fa": "محدود", - "de": "Limite", - "es": "Limite", - "fr": "Limite", - "ja": "リミテ", - "it": "Limite", - "zh-CN": "限制", - "tr": "Sınırlı", - "ru": "Лимит", - "uk": "Ліміте", - "pl": "Limite", - "hi": "सीमित", - "ar": "محدود" - }, - "created_at": "2019-10-06T10:39:45", - "updated_at": "2025-12-02T14:38:56", - "flag": 1, - "wikiDataId": "Q18487060" - }, { "id": 139524, "name": "Limiti di Greccio", @@ -392164,47 +391959,6 @@ "flag": 1, "wikiDataId": "Q2634" }, - { - "id": 140714, - "name": "Napoli", - "state_id": 5634, - "state_code": "NA", - "country_id": 107, - "country_code": "IT", - "type": "adm3", - "level": null, - "parent_id": null, - "latitude": "40.88333000", - "longitude": "14.41667000", - "native": "Napoli", - "population": null, - "timezone": "Europe/Rome", - "translations": { - "br": "Napoli", - "ko": "나폴리", - "pt-BR": "Nápoles", - "pt": "Nápoles", - "nl": "Napels", - "hr": "Napoli", - "fa": "ناپولی", - "de": "Neapel", - "es": "Nápoles", - "fr": "Naples", - "ja": "ナポリ", - "it": "Napoli", - "zh-CN": "那不勒斯", - "tr": "Napoli", - "ru": "Наполи", - "uk": "Наполі", - "pl": "Neapol", - "hi": "नपोली", - "ar": "نابولي" - }, - "created_at": "2019-10-06T10:40:02", - "updated_at": "2025-12-11T20:19:31", - "flag": 1, - "wikiDataId": "Q2634" - }, { "id": 140715, "name": "Narbolia",