Skip to content

Commit 8169bf6

Browse files
authored
Merge pull request #169 from unicef/staging_deployment
Staging Deployment
2 parents 04eba09 + 97e74c4 commit 8169bf6

File tree

2 files changed

+57
-22
lines changed

2 files changed

+57
-22
lines changed

proco/data_sources/tasks.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -704,20 +704,21 @@ def cleanup_school_master_rows():
704704
if task_instance:
705705
logger.debug('Not found running job for school master cleanup task: {}'.format(task_key))
706706
# Delete all the old records where more than 1 record are in DRAFT/UPDATED_IN_DRAFT or
707-
# ROW_STATUS_DRAFT_LOCKED/ROW_STATUS_UPDATED_IN_DRAFT_LOCKED for same School GIGA ID
707+
# ROW_STATUS_DRAFT_LOCKED/ROW_STATUS_UPDATED_IN_DRAFT_LOCKED/ROW_STATUS_DELETED for same School GIGA ID
708708
rows_with_more_than_1_record_in_draft = sources_models.SchoolMasterData.objects.filter(
709709
status__in=[
710710
sources_models.SchoolMasterData.ROW_STATUS_DRAFT,
711711
sources_models.SchoolMasterData.ROW_STATUS_UPDATED_IN_DRAFT,
712712
sources_models.SchoolMasterData.ROW_STATUS_DRAFT_LOCKED,
713713
sources_models.SchoolMasterData.ROW_STATUS_UPDATED_IN_DRAFT_LOCKED,
714+
sources_models.SchoolMasterData.ROW_STATUS_DELETED,
714715
]
715716
).values('school_id_giga', 'country_id').annotate(
716717
total_records=Count('school_id_giga', distinct=False),
717718
).order_by('-total_records', 'school_id_giga', 'country_id').filter(total_records__gt=1)
718719

719720
logger.debug('Queryset to get all the old records to delete where more than 1 record are in DRAFT/'
720-
'UPDATED_IN_DRAFT/ROW_STATUS_DRAFT_LOCKED/ROW_STATUS_UPDATED_IN_DRAFT_LOCKED '
721+
'UPDATED_IN_DRAFT/ROW_STATUS_DRAFT_LOCKED/ROW_STATUS_UPDATED_IN_DRAFT_LOCKED/ROW_STATUS_DELETED '
721722
'for same School GIGA ID: {0}'.format(rows_with_more_than_1_record_in_draft.query))
722723

723724
for row in rows_with_more_than_1_record_in_draft:
@@ -727,26 +728,48 @@ def cleanup_school_master_rows():
727728
).order_by('-created')[1:]:
728729
row_to_delete.delete()
729730
task_instance.info('Deleted rows where more than 1 record are in DRAFT/'
730-
'UPDATED_IN_DRAFT/ROW_STATUS_DRAFT_LOCKED/ROW_STATUS_UPDATED_IN_DRAFT_LOCKED '
731+
'UPDATED_IN_DRAFT/ROW_STATUS_DRAFT_LOCKED/ROW_STATUS_UPDATED_IN_DRAFT_LOCKED/ROW_STATUS_DELETED '
731732
'for same School GIGA ID')
732733

733-
# Delete all the old records where more than 1 record are in is_read=True for same School GIGA ID
734+
# At least keep 1 PUBLISHED row for each school in the school master table
735+
rows_with_more_than_1_record_in_published = sources_models.SchoolMasterData.objects.filter(
736+
status=sources_models.SchoolMasterData.ROW_STATUS_PUBLISHED,
737+
).values('school_id_giga', 'country_id').annotate(
738+
total_records=Count('school_id_giga', distinct=False),
739+
).order_by('-total_records').filter(total_records__gt=1)
740+
741+
logger.debug('Queryset to get all the old records to delete where more than 1 record are in PUBLISHED '
742+
'for same School GIGA ID: {0}'.format(rows_with_more_than_1_record_in_published.query))
743+
744+
for row in rows_with_more_than_1_record_in_published:
745+
for row_to_delete in sources_models.SchoolMasterData.objects.filter(
746+
school_id_giga=row['school_id_giga'],
747+
country_id=row['country_id'],
748+
).order_by('-published_at')[1:]:
749+
row_to_delete.delete()
750+
task_instance.info('Deleted rows where more than 1 record are in PUBLISHED for same School GIGA ID')
751+
752+
# Delete all the old records where more than 1 record are in is_read=True
753+
# by keeping at least 1 PUBLISHED row for same School GIGA ID
734754
rows_with_more_than_1_record_in_read = sources_models.SchoolMasterData.objects.filter(
735755
is_read=True,
736756
).values('school_id_giga', 'country_id').annotate(
737757
total_records=Count('school_id_giga', distinct=False),
738758
).order_by('-total_records').filter(total_records__gt=1)
739759

740-
logger.debug('Queryset to get all the old records to delete where more than 1 record are in is_read=True '
741-
'for same School GIGA ID: {0}'.format(rows_with_more_than_1_record_in_read.query))
760+
logger.debug('Queryset to get all the old records to delete where more than 1 record are in is_read=True'
761+
' by keeping at least 1 PUBLISHED row for same School GIGA ID: {0}'.format(
762+
rows_with_more_than_1_record_in_read.query))
742763

743764
for row in rows_with_more_than_1_record_in_read:
744765
for row_to_delete in sources_models.SchoolMasterData.objects.filter(
745766
school_id_giga=row['school_id_giga'],
746767
country_id=row['country_id'],
747768
).order_by('-published_at')[1:]:
748-
row_to_delete.delete()
749-
task_instance.info('Deleted rows where more than 1 record are in is_read=True for same School GIGA ID')
769+
if row_to_delete.status != sources_models.SchoolMasterData.ROW_STATUS_PUBLISHED:
770+
row_to_delete.delete()
771+
task_instance.info('Deleted rows where more than 1 record are in is_read=True '
772+
'by keeping at least 1 PUBLISHED row for same School GIGA ID')
750773

751774
background_task_utilities.task_on_complete(task_instance)
752775
else:

proco/data_sources/utils.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33
import os
44
from datetime import timedelta
5-
from typing import Optional
5+
from typing import Optional, Any
66

77
import delta_sharing
88
import numpy as np
@@ -100,41 +100,54 @@ def normalize_qos_data_frame(df):
100100
return df
101101

102102

103-
def has_changes_for_review(row, school):
103+
def _values_equal(a: Any, b: Any) -> bool:
104+
"""
105+
Null-safe equality check.
106+
Treats NaN, NaT, and None as equal if both sides are missing.
107+
"""
108+
if (pd.isna(a) and pd.isna(b)) or (a is None and b is None):
109+
return True
110+
return a == b
111+
112+
def has_changes_for_review(row, school) -> bool:
113+
"""
114+
Compare a DataFrame row with an existing School model instance.
115+
Returns True if any meaningful change is detected; False otherwise.
116+
117+
This preserves the original control flow and column accesses, but uses
118+
null-safe comparisons so NaN/NaT/None do not cause false positives.
119+
"""
104120
if school:
105-
if row['school_name'].lower() != school.name.lower():
121+
if not _values_equal(row['school_name'].lower(), school.name.lower()):
106122
return True
107123

108124
old_external_id = None \
109125
if core_utilities.is_blank_string(school.external_id) else str(school.external_id).lower()
110126
new_external_id = None \
111127
if core_utilities.is_blank_string(row['school_id_govt']) else str(row['school_id_govt']).lower()
112-
113-
if old_external_id != new_external_id:
128+
if not _values_equal(old_external_id, new_external_id):
114129
return True
115130

116131
old_admin1_id = None
117132
if school.admin1:
118133
old_admin1_id = str(school.admin1.giga_id_admin).lower()
119134
new_admin1_id = None \
120135
if core_utilities.is_blank_string(row['admin1_id_giga']) else str(row['admin1_id_giga']).lower()
121-
122-
if old_admin1_id != new_admin1_id:
136+
if not _values_equal(old_admin1_id, new_admin1_id):
123137
return True
124138

125139
old_admin2_id = None
126140
if school.admin2:
127141
old_admin2_id = str(school.admin2.giga_id_admin).lower()
128142
new_admin2_id = None \
129143
if core_utilities.is_blank_string(row['admin2_id_giga']) else str(row['admin2_id_giga']).lower()
130-
131-
if old_admin2_id != new_admin2_id:
144+
if not _values_equal(old_admin2_id, new_admin2_id):
132145
return True
133146

134147
old_lat = school.geopoint.y
135148
new_lat = row['latitude']
136149
if (
137-
old_lat != new_lat and
150+
not _values_equal(old_lat, new_lat) and
138151
(
139152
str(old_lat).split('.')[0] != str(new_lat).split('.')[0] or
140153
str(old_lat).split('.')[1][:5] != str(new_lat).split('.')[1][:5]
@@ -145,7 +158,7 @@ def has_changes_for_review(row, school):
145158
old_long = school.geopoint.x
146159
new_long = row['longitude']
147160
if (
148-
old_long != new_long and
161+
not _values_equal(old_long, new_long) and
149162
(
150163
str(old_long).split('.')[0] != str(new_long).split('.')[0] or
151164
str(old_long).split('.')[1][:5] != str(new_long).split('.')[1][:5]
@@ -157,8 +170,7 @@ def has_changes_for_review(row, school):
157170
if core_utilities.is_blank_string(school.education_level) else str(school.education_level).lower()
158171
new_education_level = None \
159172
if core_utilities.is_blank_string(row['education_level']) else str(row['education_level']).lower()
160-
161-
if old_education_level != new_education_level:
173+
if not _values_equal(old_education_level, new_education_level):
162174
return True
163175

164176
school_rt_instance = SchoolRealTimeRegistration.objects.filter(school=school).order_by('-created').first()
@@ -171,7 +183,7 @@ def has_changes_for_review(row, school):
171183
):
172184
new_connectivity_rt = str(row['connectivity_RT']).lower() in core_configs.true_choices
173185

174-
if old_connectivity_rt != new_connectivity_rt:
186+
if not _values_equal(old_connectivity_rt, new_connectivity_rt):
175187
return True
176188
return False
177189
return True

0 commit comments

Comments
 (0)