From efc4380deb933309ff96a45b596799eae04486b3 Mon Sep 17 00:00:00 2001 From: Marko Haarni Date: Fri, 17 Apr 2026 09:23:00 +0300 Subject: [PATCH 1/2] feat: IO-848 Add migration to backfill missing project types and type qualifiers for projects Project types and type qualifiers are backfilled based on project class and only if they do not exist on project. --- ...fill_missing_project_type_and_qualifier.py | 378 ++++++++++++++++++ 1 file changed, 378 insertions(+) create mode 100644 infraohjelmointi_api/migrations/0099_backfill_missing_project_type_and_qualifier.py diff --git a/infraohjelmointi_api/migrations/0099_backfill_missing_project_type_and_qualifier.py b/infraohjelmointi_api/migrations/0099_backfill_missing_project_type_and_qualifier.py new file mode 100644 index 00000000..3ce21b09 --- /dev/null +++ b/infraohjelmointi_api/migrations/0099_backfill_missing_project_type_and_qualifier.py @@ -0,0 +1,378 @@ +from django.db import migrations +from django.db.models import Q +import re + + +NEW_CONSTRUCTION_TYPE = "newConstruction" +BASIC_IMPROVEMENT_TYPE = "basicImprovement" + +STREET_QUALIFIER = "street" +PARK_QUALIFIER = "park" +PRE_CONSTRUCTION_QUALIFIER = "preConstruction" +SPORTS_QUALIFIER = "sports" +SPECIALTY_STRUCTURES_QUALIFIER = "spesialtyStructures" + + +# Mirrors ProjectClassSerializer name transformation behavior for migration-safe matching. +SUFFIX_PATTERN = re.compile( + r",\s*(?:Kylkn|kylkn|Kaupunkiymparistolautakunnan|kaupunkiymparistolautakunnan|" + r"KHN|Khn|khn|Kaupunginhallituksen|kaupunginhallituksen)\s+kaytettavaksi" +) +NUMBERING_PATTERN = re.compile(r"^(\d+\s+\d+(?:\s+\d+){0,2})") +LEADING_CODE_PATTERN = re.compile(r"^(?:\d+\s*){1,4}\s*") + + +# Number + name groups for explicit matching. +NEW_CONSTRUCTION_CLASS_KEYS = { + "8010301 muu esirakentaminen", + "8030101 uudisrakentaminen", + "8040101 uudet puistot", + "8040102 uudet liikuntapaikat ja ulkoilualueet", + "808 projektialueiden infrarakentaminen", + "8090102 kadut uudisrakentaminen", + "8090103 puistot uudisrakentaminen", + "8090202 kadut uudisrakentaminen", + "8090203 puistot uudisrakentaminen", + "8090302 kadut uudisrakentaminen", + "8090303 puistot uudisrakentaminen", + "8090402 kadut uudisrakentaminen", + "8090403 puistot uudisrakentaminen", + "810 suuret liikennehankkeet", +} + +STREET_CLASS_KEYS = { + "803 kadut ja liikennevaylat", + "8030103 muut investoinnit", + "80802 kadut", + "8090102 kadut uudisrakentaminen", + "8090102 kadut peruskorjaus", + "8090202 kadut uudisrakentaminen", + "8090202 kadut peruskorjaus", + "8090302 kadut uudisrakentaminen", + "8090302 kadut peruskorjaus", + "8090402 kadut uudisrakentaminen", + "8090402 kadut peruskorjaus", + "8100103 liittyvat kadut ja liikennevaylat", + "81003 sornaisten tunneli", + "8100402 liittyvat kadut ja liikennevaylat", +} + +PARK_CLASS_KEYS = { + "8040101 uudet puistot", + "8040101 puistojen peruskorjaus", + "80803 puistot ja liikunta-alueet", + "8090103 puistot uudisrakentaminen", + "8090103 puistot peruskorjaus", + "8090203 puistot uudisrakentaminen", + "8090203 puistot peruskorjaus", + "8090303 puistot uudisrakentaminen", + "8090303 puistot peruskorjaus", + "8090403 puistot uudisrakentaminen", + "8090403 puistot peruskorjaus", +} + +PRE_CONSTRUCTION_CLASS_KEYS = { + "801 kiintea omaisuus", + "80801 esirakentaminen", + "8090101 esirakentaminen", + "8090201 esirakentaminen", + "8090301 esirakentaminen", + "8090401 esirakentaminen", + "8100102 liittyva esirakentaminen", + "8100401 liittyva esirakentaminen", +} + +SPORTS_CLASS_KEYS = { + "8040102 uudet liikuntapaikat ja ulkoilualueet", + "8040102 liikunta- ja ulkoilualueiden peruskorjaus", + "8090103 liikunta-alueet", + "8090203 liikunta-alueet", + "8090303 liikunta-alueet", + "8090403 liikunta-alueet", +} + + +def normalize_text(value): + if not value: + return "" + normalized = " ".join(str(value).strip().lower().split()) + normalized = normalized.replace("ä", "a").replace("ö", "o") + return normalized + + +def clean_class_name(name): + if not name: + return "" + return SUFFIX_PATTERN.sub("", str(name)).strip() + + +def normalize_display_name_for_match(name): + clean_name = clean_class_name(name) + without_numbering = LEADING_CODE_PATTERN.sub("", clean_name).strip() + normalized = normalize_text(without_numbering) + normalized = normalized.replace(" - ", "-") + return normalized + + +def build_number_name_key(value): + if not value: + return None + + text = str(value).strip() + if not text: + return None + + digit_match = re.search(r"[0-9][0-9 ]+", text) + if not digit_match: + return None + + digits = re.sub(r"\D", "", digit_match.group(0)) + if not digits: + return None + + name_part = normalize_display_name_for_match(text) + if not name_part: + return None + + return f"{digits} {name_part}" + + +def extract_numbering(name): + clean_name = clean_class_name(name) + match = NUMBERING_PATTERN.match(clean_name) + return match.group(1) if match else None + + +def get_coordinator_numbering(project_class): + try: + coordinator = project_class.coordinatorClass + except Exception: + return None + + for _ in range(10): + if not coordinator: + break + numbering = extract_numbering(getattr(coordinator, "name", "")) + if numbering: + return numbering + coordinator = getattr(coordinator, "parent", None) + + return None + + +def find_ancestor_numbering(project_class): + parent = getattr(project_class, "parent", None) + for _ in range(10): + if not parent or getattr(parent, "forCoordinatorOnly", False): + break + + parent_raw_numbering = extract_numbering(getattr(parent, "name", "")) + parent_coordinator_numbering = get_coordinator_numbering(parent) + + if ( + parent_coordinator_numbering + and parent_raw_numbering != parent_coordinator_numbering + ): + return parent_coordinator_numbering + + parent = getattr(parent, "parent", None) + + return None + + +def is_more_specific(child_numbering, ancestor_numbering): + return child_numbering.startswith(ancestor_numbering) and len(child_numbering) > len( + ancestor_numbering + ) + + +def get_numbering_to_apply(project_class): + class_name = normalize_text(getattr(project_class, "name", "")) + # Keep numbering behavior aligned with ProjectClassSerializer display rules. + if getattr(project_class, "forCoordinatorOnly", False) or "suurpiiri" in class_name: + return None + + my_numbering = get_coordinator_numbering(project_class) + if not my_numbering: + return None + + ancestor_numbering = find_ancestor_numbering(project_class) + if not ancestor_numbering: + return my_numbering + + if is_more_specific(my_numbering, ancestor_numbering): + return my_numbering + + return None + + +def get_class_display_name(project_class): + base_name = clean_class_name(getattr(project_class, "name", "")) + numbering = get_numbering_to_apply(project_class) + if not numbering: + return base_name + + name_without_numbering = NUMBERING_PATTERN.sub("", base_name).strip() + return f"{numbering} {name_without_numbering}".strip() + + +def iter_class_hierarchy(project_class, max_depth=10): + # Traverse selected class and parents with cycle/depth guards for migration safety. + current = project_class + seen = set() + + for _ in range(max_depth): + if not current: + break + + class_id = getattr(current, "id", None) + if class_id is not None: + if class_id in seen: + break + seen.add(class_id) + + yield current + current = getattr(current, "parent", None) + + +def get_number_name_keys(project_class): + if not project_class: + return set() + + # Build a lookup key set from the direct class and all ancestors so inherited + # classifications can still resolve type/qualifier values. + keys = set() + for current_class in iter_class_hierarchy(project_class): + display_name = get_class_display_name(current_class) + raw_name = getattr(current_class, "name", "") + path = getattr(current_class, "path", "") + + for candidate in [display_name, raw_name]: + key = build_number_name_key(candidate) + if key: + keys.add(key) + + if path: + for segment in str(path).split("/"): + key = build_number_name_key(segment) + if key: + keys.add(key) + + return keys + + +def resolve_project_type_value(project_class): + if not project_class: + return BASIC_IMPROVEMENT_TYPE + + number_name_keys = get_number_name_keys(project_class) + if number_name_keys.intersection(NEW_CONSTRUCTION_CLASS_KEYS): + return NEW_CONSTRUCTION_TYPE + + return BASIC_IMPROVEMENT_TYPE + + +def resolve_project_type_qualifier_value(project_class): + if not project_class: + return None + + number_name_keys = get_number_name_keys(project_class) + raw_name = normalize_text(getattr(project_class, "name", "")) + + # Name-based exceptional mapping for special structures. + if "siltojen peruskorjaus ja uusiminen" in raw_name: + return SPECIALTY_STRUCTURES_QUALIFIER + if "tulvasuojelu ja rantarakenteet" in raw_name: + return SPECIALTY_STRUCTURES_QUALIFIER + + if number_name_keys.intersection(STREET_CLASS_KEYS): + return STREET_QUALIFIER + + if number_name_keys.intersection(PRE_CONSTRUCTION_CLASS_KEYS): + return PRE_CONSTRUCTION_QUALIFIER + + if number_name_keys.intersection(SPORTS_CLASS_KEYS): + return SPORTS_QUALIFIER + + if number_name_keys.intersection(PARK_CLASS_KEYS): + return PARK_QUALIFIER + + return None + + +def backfill_missing_project_type_and_qualifier(apps, schema_editor): + Project = apps.get_model("infraohjelmointi_api", "Project") + ProjectType = apps.get_model("infraohjelmointi_api", "ProjectType") + ProjectTypeQualifier = apps.get_model("infraohjelmointi_api", "ProjectTypeQualifier") + + project_type_ids = dict( + ProjectType.objects.filter( + value__in=[NEW_CONSTRUCTION_TYPE, BASIC_IMPROVEMENT_TYPE] + ).values_list("value", "id") + ) + + required_types = [NEW_CONSTRUCTION_TYPE, BASIC_IMPROVEMENT_TYPE] + missing_types = [value for value in required_types if value not in project_type_ids] + if missing_types: + raise RuntimeError( + f"Required ProjectType values missing for migration: {', '.join(missing_types)}" + ) + + qualifier_values = [ + STREET_QUALIFIER, + PARK_QUALIFIER, + PRE_CONSTRUCTION_QUALIFIER, + SPORTS_QUALIFIER, + SPECIALTY_STRUCTURES_QUALIFIER, + ] + qualifier_ids = dict( + ProjectTypeQualifier.objects.filter(value__in=qualifier_values).values_list( + "value", "id" + ) + ) + + pending_updates = [] + # Only touch rows that still have missing target fields. + queryset = Project.objects.filter( + Q(type__isnull=True) | Q(typeQualifier__isnull=True) + ).select_related("projectClass") + + for project in queryset.iterator(chunk_size=1000): + changed = False + + if project.type_id is None: + type_value = resolve_project_type_value(project.projectClass) + project.type_id = project_type_ids[type_value] + changed = True + + if project.typeQualifier_id is None: + qualifier_value = resolve_project_type_qualifier_value(project.projectClass) + qualifier_id = qualifier_ids.get(qualifier_value) + if qualifier_id: + project.typeQualifier_id = qualifier_id + changed = True + + if changed: + pending_updates.append(project) + + if len(pending_updates) >= 1000: + # Batch updates to keep memory use and transaction overhead bounded. + Project.objects.bulk_update(pending_updates, ["type", "typeQualifier"]) + pending_updates = [] + + if pending_updates: + Project.objects.bulk_update(pending_updates, ["type", "typeQualifier"]) + + +class Migration(migrations.Migration): + + dependencies = [ + ("infraohjelmointi_api", "0098_merge_20260415_1417"), + ] + + operations = [ + migrations.RunPython( + backfill_missing_project_type_and_qualifier, + migrations.RunPython.noop, + ), + ] From ee0b278f17e9667b2debbea770c8d4a8c421c7a5 Mon Sep 17 00:00:00 2001 From: Marko Haarni Date: Wed, 22 Apr 2026 12:16:42 +0300 Subject: [PATCH 2/2] fix: IO-848 Fix SUFFIX_PATTERN to use finnish characters --- .../0099_backfill_missing_project_type_and_qualifier.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/infraohjelmointi_api/migrations/0099_backfill_missing_project_type_and_qualifier.py b/infraohjelmointi_api/migrations/0099_backfill_missing_project_type_and_qualifier.py index 3ce21b09..abddc326 100644 --- a/infraohjelmointi_api/migrations/0099_backfill_missing_project_type_and_qualifier.py +++ b/infraohjelmointi_api/migrations/0099_backfill_missing_project_type_and_qualifier.py @@ -15,8 +15,8 @@ # Mirrors ProjectClassSerializer name transformation behavior for migration-safe matching. SUFFIX_PATTERN = re.compile( - r",\s*(?:Kylkn|kylkn|Kaupunkiymparistolautakunnan|kaupunkiymparistolautakunnan|" - r"KHN|Khn|khn|Kaupunginhallituksen|kaupunginhallituksen)\s+kaytettavaksi" + r",\s*(?:Kylkn|kylkn|Kaupunkiympäristölautakunnan|kaupunkiympäristölautakunnan|" + r"KHN|Khn|khn|Kaupunginhallituksen|kaupunginhallituksen)\s+käytettäväksi" ) NUMBERING_PATTERN = re.compile(r"^(\d+\s+\d+(?:\s+\d+){0,2})") LEADING_CODE_PATTERN = re.compile(r"^(?:\d+\s*){1,4}\s*")