-
Notifications
You must be signed in to change notification settings - Fork 3
feat: IO-848 Add migration to backfill missing project types and type qualifiers for projects #429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
markohaarni
wants to merge
2
commits into
develop
Choose a base branch
from
IO-848/bulk-update-migration-for-project-type
base: develop
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
378 changes: 378 additions & 0 deletions
378
infraohjelmointi_api/migrations/0099_backfill_missing_project_type_and_qualifier.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,378 @@ | ||
| from django.db import migrations | ||
| from django.db.models import Q | ||
| import re | ||
|
|
||
|
|
||
| NEW_CONSTRUCTION_TYPE = "newConstruction" | ||
| BASIC_IMPROVEMENT_TYPE = "basicImprovement" | ||
|
|
||
| STREET_QUALIFIER = "street" | ||
| PARK_QUALIFIER = "park" | ||
| PRE_CONSTRUCTION_QUALIFIER = "preConstruction" | ||
| SPORTS_QUALIFIER = "sports" | ||
| SPECIALTY_STRUCTURES_QUALIFIER = "spesialtyStructures" | ||
|
|
||
|
|
||
| # Mirrors ProjectClassSerializer name transformation behavior for migration-safe matching. | ||
| SUFFIX_PATTERN = re.compile( | ||
| r",\s*(?:Kylkn|kylkn|Kaupunkiympäristölautakunnan|kaupunkiympäristölautakunnan|" | ||
| r"KHN|Khn|khn|Kaupunginhallituksen|kaupunginhallituksen)\s+käytettäväksi" | ||
| ) | ||
| NUMBERING_PATTERN = re.compile(r"^(\d+\s+\d+(?:\s+\d+){0,2})") | ||
| LEADING_CODE_PATTERN = re.compile(r"^(?:\d+\s*){1,4}\s*") | ||
|
|
||
|
|
||
| # Number + name groups for explicit matching. | ||
| NEW_CONSTRUCTION_CLASS_KEYS = { | ||
| "8010301 muu esirakentaminen", | ||
| "8030101 uudisrakentaminen", | ||
| "8040101 uudet puistot", | ||
| "8040102 uudet liikuntapaikat ja ulkoilualueet", | ||
| "808 projektialueiden infrarakentaminen", | ||
| "8090102 kadut uudisrakentaminen", | ||
| "8090103 puistot uudisrakentaminen", | ||
| "8090202 kadut uudisrakentaminen", | ||
| "8090203 puistot uudisrakentaminen", | ||
| "8090302 kadut uudisrakentaminen", | ||
| "8090303 puistot uudisrakentaminen", | ||
| "8090402 kadut uudisrakentaminen", | ||
| "8090403 puistot uudisrakentaminen", | ||
| "810 suuret liikennehankkeet", | ||
| } | ||
|
|
||
| STREET_CLASS_KEYS = { | ||
| "803 kadut ja liikennevaylat", | ||
| "8030103 muut investoinnit", | ||
| "80802 kadut", | ||
| "8090102 kadut uudisrakentaminen", | ||
| "8090102 kadut peruskorjaus", | ||
| "8090202 kadut uudisrakentaminen", | ||
| "8090202 kadut peruskorjaus", | ||
| "8090302 kadut uudisrakentaminen", | ||
| "8090302 kadut peruskorjaus", | ||
| "8090402 kadut uudisrakentaminen", | ||
| "8090402 kadut peruskorjaus", | ||
| "8100103 liittyvat kadut ja liikennevaylat", | ||
| "81003 sornaisten tunneli", | ||
| "8100402 liittyvat kadut ja liikennevaylat", | ||
| } | ||
|
|
||
| PARK_CLASS_KEYS = { | ||
| "8040101 uudet puistot", | ||
| "8040101 puistojen peruskorjaus", | ||
| "80803 puistot ja liikunta-alueet", | ||
| "8090103 puistot uudisrakentaminen", | ||
| "8090103 puistot peruskorjaus", | ||
| "8090203 puistot uudisrakentaminen", | ||
| "8090203 puistot peruskorjaus", | ||
| "8090303 puistot uudisrakentaminen", | ||
| "8090303 puistot peruskorjaus", | ||
| "8090403 puistot uudisrakentaminen", | ||
| "8090403 puistot peruskorjaus", | ||
| } | ||
|
|
||
| PRE_CONSTRUCTION_CLASS_KEYS = { | ||
| "801 kiintea omaisuus", | ||
| "80801 esirakentaminen", | ||
| "8090101 esirakentaminen", | ||
| "8090201 esirakentaminen", | ||
| "8090301 esirakentaminen", | ||
| "8090401 esirakentaminen", | ||
| "8100102 liittyva esirakentaminen", | ||
| "8100401 liittyva esirakentaminen", | ||
| } | ||
|
|
||
| SPORTS_CLASS_KEYS = { | ||
| "8040102 uudet liikuntapaikat ja ulkoilualueet", | ||
| "8040102 liikunta- ja ulkoilualueiden peruskorjaus", | ||
| "8090103 liikunta-alueet", | ||
| "8090203 liikunta-alueet", | ||
| "8090303 liikunta-alueet", | ||
| "8090403 liikunta-alueet", | ||
| } | ||
|
|
||
|
|
||
| def normalize_text(value): | ||
| if not value: | ||
| return "" | ||
| normalized = " ".join(str(value).strip().lower().split()) | ||
| normalized = normalized.replace("ä", "a").replace("ö", "o") | ||
| return normalized | ||
|
|
||
|
|
||
| def clean_class_name(name): | ||
| if not name: | ||
| return "" | ||
| return SUFFIX_PATTERN.sub("", str(name)).strip() | ||
|
|
||
|
|
||
| def normalize_display_name_for_match(name): | ||
| clean_name = clean_class_name(name) | ||
| without_numbering = LEADING_CODE_PATTERN.sub("", clean_name).strip() | ||
| normalized = normalize_text(without_numbering) | ||
| normalized = normalized.replace(" - ", "-") | ||
| return normalized | ||
|
|
||
|
|
||
| def build_number_name_key(value): | ||
| if not value: | ||
| return None | ||
|
|
||
| text = str(value).strip() | ||
| if not text: | ||
| return None | ||
|
|
||
| digit_match = re.search(r"[0-9][0-9 ]+", text) | ||
| if not digit_match: | ||
| return None | ||
|
|
||
| digits = re.sub(r"\D", "", digit_match.group(0)) | ||
| if not digits: | ||
| return None | ||
|
|
||
| name_part = normalize_display_name_for_match(text) | ||
| if not name_part: | ||
| return None | ||
|
|
||
| return f"{digits} {name_part}" | ||
|
|
||
|
|
||
| def extract_numbering(name): | ||
| clean_name = clean_class_name(name) | ||
| match = NUMBERING_PATTERN.match(clean_name) | ||
| return match.group(1) if match else None | ||
|
|
||
|
|
||
| def get_coordinator_numbering(project_class): | ||
| try: | ||
| coordinator = project_class.coordinatorClass | ||
| except Exception: | ||
| return None | ||
|
|
||
| for _ in range(10): | ||
| if not coordinator: | ||
| break | ||
| numbering = extract_numbering(getattr(coordinator, "name", "")) | ||
| if numbering: | ||
| return numbering | ||
| coordinator = getattr(coordinator, "parent", None) | ||
|
|
||
| return None | ||
|
|
||
|
|
||
| def find_ancestor_numbering(project_class): | ||
| parent = getattr(project_class, "parent", None) | ||
| for _ in range(10): | ||
| if not parent or getattr(parent, "forCoordinatorOnly", False): | ||
| break | ||
|
|
||
| parent_raw_numbering = extract_numbering(getattr(parent, "name", "")) | ||
| parent_coordinator_numbering = get_coordinator_numbering(parent) | ||
|
|
||
| if ( | ||
| parent_coordinator_numbering | ||
| and parent_raw_numbering != parent_coordinator_numbering | ||
| ): | ||
| return parent_coordinator_numbering | ||
|
|
||
| parent = getattr(parent, "parent", None) | ||
|
|
||
| return None | ||
|
|
||
|
|
||
| def is_more_specific(child_numbering, ancestor_numbering): | ||
| return child_numbering.startswith(ancestor_numbering) and len(child_numbering) > len( | ||
| ancestor_numbering | ||
| ) | ||
|
|
||
|
|
||
| def get_numbering_to_apply(project_class): | ||
| class_name = normalize_text(getattr(project_class, "name", "")) | ||
| # Keep numbering behavior aligned with ProjectClassSerializer display rules. | ||
| if getattr(project_class, "forCoordinatorOnly", False) or "suurpiiri" in class_name: | ||
| return None | ||
|
|
||
| my_numbering = get_coordinator_numbering(project_class) | ||
| if not my_numbering: | ||
| return None | ||
|
|
||
| ancestor_numbering = find_ancestor_numbering(project_class) | ||
| if not ancestor_numbering: | ||
| return my_numbering | ||
|
|
||
| if is_more_specific(my_numbering, ancestor_numbering): | ||
| return my_numbering | ||
|
|
||
| return None | ||
|
|
||
|
|
||
| def get_class_display_name(project_class): | ||
| base_name = clean_class_name(getattr(project_class, "name", "")) | ||
| numbering = get_numbering_to_apply(project_class) | ||
| if not numbering: | ||
| return base_name | ||
|
|
||
| name_without_numbering = NUMBERING_PATTERN.sub("", base_name).strip() | ||
| return f"{numbering} {name_without_numbering}".strip() | ||
|
|
||
|
|
||
| def iter_class_hierarchy(project_class, max_depth=10): | ||
| # Traverse selected class and parents with cycle/depth guards for migration safety. | ||
| current = project_class | ||
| seen = set() | ||
|
|
||
| for _ in range(max_depth): | ||
| if not current: | ||
| break | ||
|
|
||
| class_id = getattr(current, "id", None) | ||
| if class_id is not None: | ||
| if class_id in seen: | ||
| break | ||
| seen.add(class_id) | ||
|
|
||
| yield current | ||
| current = getattr(current, "parent", None) | ||
|
|
||
|
|
||
| def get_number_name_keys(project_class): | ||
| if not project_class: | ||
| return set() | ||
|
|
||
| # Build a lookup key set from the direct class and all ancestors so inherited | ||
| # classifications can still resolve type/qualifier values. | ||
| keys = set() | ||
| for current_class in iter_class_hierarchy(project_class): | ||
| display_name = get_class_display_name(current_class) | ||
| raw_name = getattr(current_class, "name", "") | ||
| path = getattr(current_class, "path", "") | ||
|
|
||
| for candidate in [display_name, raw_name]: | ||
| key = build_number_name_key(candidate) | ||
| if key: | ||
| keys.add(key) | ||
|
|
||
| if path: | ||
| for segment in str(path).split("/"): | ||
| key = build_number_name_key(segment) | ||
| if key: | ||
| keys.add(key) | ||
|
|
||
| return keys | ||
|
|
||
|
|
||
| def resolve_project_type_value(project_class): | ||
| if not project_class: | ||
| return BASIC_IMPROVEMENT_TYPE | ||
|
|
||
| number_name_keys = get_number_name_keys(project_class) | ||
| if number_name_keys.intersection(NEW_CONSTRUCTION_CLASS_KEYS): | ||
| return NEW_CONSTRUCTION_TYPE | ||
|
|
||
| return BASIC_IMPROVEMENT_TYPE | ||
|
|
||
|
|
||
| def resolve_project_type_qualifier_value(project_class): | ||
| if not project_class: | ||
| return None | ||
|
|
||
| number_name_keys = get_number_name_keys(project_class) | ||
| raw_name = normalize_text(getattr(project_class, "name", "")) | ||
|
|
||
| # Name-based exceptional mapping for special structures. | ||
| if "siltojen peruskorjaus ja uusiminen" in raw_name: | ||
| return SPECIALTY_STRUCTURES_QUALIFIER | ||
| if "tulvasuojelu ja rantarakenteet" in raw_name: | ||
| return SPECIALTY_STRUCTURES_QUALIFIER | ||
|
|
||
| if number_name_keys.intersection(STREET_CLASS_KEYS): | ||
| return STREET_QUALIFIER | ||
|
|
||
| if number_name_keys.intersection(PRE_CONSTRUCTION_CLASS_KEYS): | ||
| return PRE_CONSTRUCTION_QUALIFIER | ||
|
|
||
| if number_name_keys.intersection(SPORTS_CLASS_KEYS): | ||
| return SPORTS_QUALIFIER | ||
|
|
||
| if number_name_keys.intersection(PARK_CLASS_KEYS): | ||
| return PARK_QUALIFIER | ||
|
|
||
| return None | ||
|
|
||
|
|
||
| def backfill_missing_project_type_and_qualifier(apps, schema_editor): | ||
| Project = apps.get_model("infraohjelmointi_api", "Project") | ||
| ProjectType = apps.get_model("infraohjelmointi_api", "ProjectType") | ||
| ProjectTypeQualifier = apps.get_model("infraohjelmointi_api", "ProjectTypeQualifier") | ||
|
|
||
| project_type_ids = dict( | ||
| ProjectType.objects.filter( | ||
| value__in=[NEW_CONSTRUCTION_TYPE, BASIC_IMPROVEMENT_TYPE] | ||
| ).values_list("value", "id") | ||
| ) | ||
|
|
||
| required_types = [NEW_CONSTRUCTION_TYPE, BASIC_IMPROVEMENT_TYPE] | ||
| missing_types = [value for value in required_types if value not in project_type_ids] | ||
| if missing_types: | ||
| raise RuntimeError( | ||
| f"Required ProjectType values missing for migration: {', '.join(missing_types)}" | ||
| ) | ||
|
|
||
| qualifier_values = [ | ||
| STREET_QUALIFIER, | ||
| PARK_QUALIFIER, | ||
| PRE_CONSTRUCTION_QUALIFIER, | ||
| SPORTS_QUALIFIER, | ||
| SPECIALTY_STRUCTURES_QUALIFIER, | ||
| ] | ||
| qualifier_ids = dict( | ||
| ProjectTypeQualifier.objects.filter(value__in=qualifier_values).values_list( | ||
| "value", "id" | ||
| ) | ||
| ) | ||
|
|
||
| pending_updates = [] | ||
| # Only touch rows that still have missing target fields. | ||
| queryset = Project.objects.filter( | ||
| Q(type__isnull=True) | Q(typeQualifier__isnull=True) | ||
| ).select_related("projectClass") | ||
|
|
||
| for project in queryset.iterator(chunk_size=1000): | ||
| changed = False | ||
|
|
||
| if project.type_id is None: | ||
| type_value = resolve_project_type_value(project.projectClass) | ||
| project.type_id = project_type_ids[type_value] | ||
| changed = True | ||
|
|
||
| if project.typeQualifier_id is None: | ||
| qualifier_value = resolve_project_type_qualifier_value(project.projectClass) | ||
| qualifier_id = qualifier_ids.get(qualifier_value) | ||
| if qualifier_id: | ||
| project.typeQualifier_id = qualifier_id | ||
| changed = True | ||
|
|
||
| if changed: | ||
| pending_updates.append(project) | ||
|
|
||
| if len(pending_updates) >= 1000: | ||
| # Batch updates to keep memory use and transaction overhead bounded. | ||
| Project.objects.bulk_update(pending_updates, ["type", "typeQualifier"]) | ||
| pending_updates = [] | ||
|
|
||
| if pending_updates: | ||
| Project.objects.bulk_update(pending_updates, ["type", "typeQualifier"]) | ||
|
|
||
|
|
||
| class Migration(migrations.Migration): | ||
|
|
||
| dependencies = [ | ||
| ("infraohjelmointi_api", "0098_merge_20260415_1417"), | ||
| ] | ||
|
|
||
| operations = [ | ||
| migrations.RunPython( | ||
| backfill_missing_project_type_and_qualifier, | ||
| migrations.RunPython.noop, | ||
| ), | ||
| ] | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This pattern should be using finnish characters, like in ProjectClassSerializer.py
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point! Fixed that.