32
32
# =================================================================
33
33
34
34
import collections
35
+ from datetime import datetime
35
36
import logging
36
37
37
38
import click
48
49
HTTP_OK = 200
49
50
POST_OK = 201
50
51
HEADERS = {'Content-type' : 'application/json' }
52
+ TODAY = datetime .now ().strftime ('%Y-%m-%d' )
51
53
52
54
53
55
class ClimateArchiveLoader (BaseLoader ):
@@ -77,6 +79,7 @@ def create_index(self, index):
77
79
created.
78
80
79
81
:param index: the index to be created.
82
+ :returns: the name of the index created.
80
83
"""
81
84
82
85
if index == 'stations' :
@@ -285,7 +288,7 @@ def create_index(self, index):
285
288
},
286
289
}
287
290
288
- index_name = 'climate_normals_data'
291
+ index_name = f 'climate_normals_data. { TODAY } '
289
292
self .conn .create (index_name , mapping , overwrite = True )
290
293
291
294
if index == 'monthly_summary' :
@@ -372,7 +375,7 @@ def create_index(self, index):
372
375
},
373
376
}
374
377
375
- index_name = 'climate_public_climate_summary'
378
+ index_name = f 'climate_public_climate_summary. { TODAY } '
376
379
self .conn .create (index_name , mapping , overwrite = True )
377
380
378
381
if index == 'daily_summary' :
@@ -487,7 +490,7 @@ def create_index(self, index):
487
490
},
488
491
}
489
492
490
- index_name = 'climate_public_daily_data'
493
+ index_name = f 'climate_public_daily_data. { TODAY } '
491
494
self .conn .create (index_name , mapping , overwrite = True )
492
495
493
496
if index == 'hourly_summary' :
@@ -598,18 +601,20 @@ def create_index(self, index):
598
601
},
599
602
}
600
603
601
- index_name = 'climate_public_hourly_data'
604
+ index_name = f 'climate_public_hourly_data. { TODAY } '
602
605
self .conn .create (index_name , mapping , overwrite = True )
603
606
604
- def generate_stations (self ):
607
+ return index_name
608
+
609
+ def generate_stations (self , index_name ):
605
610
"""
606
611
Queries stations data from the db, and reformats
607
612
data so it can be inserted into Elasticsearch.
608
613
609
614
Returns a generator of dictionaries that represent upsert actions
610
615
into Elasticsearch's bulk API.
611
616
612
- :param cur: oracle cursor to perform queries against .
617
+ :param index_name: name of the index to insert data into .
613
618
:returns: generator of bulk API upsert actions.
614
619
"""
615
620
@@ -656,14 +661,16 @@ def generate_stations(self):
656
661
657
662
action = {
658
663
'_id' : climate_identifier ,
659
- '_index' : 'climate_station_information' ,
664
+ '_index' : index_name ,
660
665
'_op_type' : 'update' ,
661
666
'doc' : wrapper ,
662
667
'doc_as_upsert' : True ,
663
668
}
664
669
yield action
665
670
666
- def generate_normals (self , stn_dict , normals_dict , periods_dict ):
671
+ def generate_normals (
672
+ self , stn_dict , normals_dict , periods_dict , index_name
673
+ ):
667
674
"""
668
675
Queries normals data from the db, and reformats
669
676
data so it can be inserted into Elasticsearch.
@@ -676,6 +683,7 @@ def generate_normals(self, stn_dict, normals_dict, periods_dict):
676
683
:param normals_dict: mapping of normal IDs to normals information.
677
684
:param periods_dict: mapping of normal period IDs to
678
685
normal period information.
686
+ :param index_name: name of the index to insert data into.
679
687
:returns: generator of bulk API upsert actions.
680
688
"""
681
689
@@ -738,7 +746,7 @@ def generate_normals(self, stn_dict, normals_dict, periods_dict):
738
746
}
739
747
action = {
740
748
'_id' : insert_dict ['ID' ],
741
- '_index' : 'climate_normals_data' ,
749
+ '_index' : index_name ,
742
750
'_op_type' : 'update' ,
743
751
'doc' : wrapper ,
744
752
'doc_as_upsert' : True ,
@@ -750,7 +758,7 @@ def generate_normals(self, stn_dict, normals_dict, periods_dict):
750
758
f" records for this station"
751
759
)
752
760
753
- def generate_monthly_data (self , stn_dict , date = None ):
761
+ def generate_monthly_data (self , stn_dict , index_name , date = None ):
754
762
"""
755
763
Queries monthly data from the db, and reformats
756
764
data so it can be inserted into Elasticsearch.
@@ -760,6 +768,7 @@ def generate_monthly_data(self, stn_dict, date=None):
760
768
761
769
:param cur: oracle cursor to perform queries against.
762
770
:param stn_dict: mapping of station IDs to station information.
771
+ :param index_name: name of the index to insert data into.
763
772
:param date: date to start fetching data from.
764
773
:returns: generator of bulk API upsert actions.
765
774
"""
@@ -813,7 +822,7 @@ def generate_monthly_data(self, stn_dict, date=None):
813
822
}
814
823
action = {
815
824
'_id' : insert_dict ['ID' ],
816
- '_index' : 'climate_public_climate_summary' ,
825
+ '_index' : index_name ,
817
826
'_op_type' : 'update' ,
818
827
'doc' : wrapper ,
819
828
'doc_as_upsert' : True ,
@@ -825,7 +834,7 @@ def generate_monthly_data(self, stn_dict, date=None):
825
834
f" records for this station"
826
835
)
827
836
828
- def generate_daily_data (self , stn_dict , date = None ):
837
+ def generate_daily_data (self , stn_dict , index_name , date = None ):
829
838
"""
830
839
Queries daily data from the db, and reformats
831
840
data so it can be inserted into Elasticsearch.
@@ -835,6 +844,7 @@ def generate_daily_data(self, stn_dict, date=None):
835
844
836
845
:param cur: oracle cursor to perform queries against.
837
846
:param stn_dict: mapping of station IDs to station information.
847
+ :param index_name: name of the index to insert data into.
838
848
:param date: date to start fetching data from.
839
849
:returns: generator of bulk API upsert actions.
840
850
"""
@@ -900,7 +910,7 @@ def generate_daily_data(self, stn_dict, date=None):
900
910
}
901
911
action = {
902
912
'_id' : insert_dict ['ID' ],
903
- '_index' : 'climate_public_daily_data' ,
913
+ '_index' : index_name ,
904
914
'_op_type' : 'update' ,
905
915
'doc' : wrapper ,
906
916
'doc_as_upsert' : True ,
@@ -912,7 +922,7 @@ def generate_daily_data(self, stn_dict, date=None):
912
922
f" records for this station"
913
923
)
914
924
915
- def generate_hourly_data (self , stn_dict , date = None ):
925
+ def generate_hourly_data (self , stn_dict , index_name , date = None ):
916
926
"""
917
927
Queries hourly data from the db, and reformats
918
928
data so it can be inserted into Elasticsearch.
@@ -922,6 +932,7 @@ def generate_hourly_data(self, stn_dict, date=None):
922
932
923
933
:param cur: oracle cursor to perform queries against.
924
934
:param stn_dict: mapping of station IDs to station information.
935
+ :param index_name: name of the index to insert data into.
925
936
:param date: date to start fetching data from.
926
937
:returns: generator of bulk API upsert actions.
927
938
"""
@@ -987,7 +998,7 @@ def generate_hourly_data(self, stn_dict, date=None):
987
998
}
988
999
action = {
989
1000
'_id' : insert_dict ['ID' ],
990
- '_index' : 'climate_public_hourly_data' ,
1001
+ '_index' : index_name ,
991
1002
'_op_type' : 'update' ,
992
1003
'doc' : wrapper ,
993
1004
'doc_as_upsert' : True ,
@@ -1184,13 +1195,17 @@ def add(
1184
1195
else :
1185
1196
datasets_to_process = [dataset ]
1186
1197
1198
+ # if no date, station or starting_from is provided, then it is
1199
+ # a full reindexing
1200
+ full_reindex = not (date or station or starting_from )
1201
+
1187
1202
click .echo (f'Processing dataset(s): { datasets_to_process } ' )
1188
1203
1189
1204
if 'stations' in datasets_to_process :
1190
1205
try :
1191
1206
click .echo ('Populating stations index' )
1192
- loader .create_index ('stations' )
1193
- stations = loader .generate_stations ()
1207
+ index_name = loader .create_index ('stations' )
1208
+ stations = loader .generate_stations (index_name )
1194
1209
loader .conn .submit_elastic_package (stations , batch_size )
1195
1210
except Exception as err :
1196
1211
msg = f'Could not populate stations index: { err } '
@@ -1202,11 +1217,20 @@ def add(
1202
1217
stn_dict = loader .get_station_data (station , starting_from )
1203
1218
normals_dict = loader .get_normals_data ()
1204
1219
periods_dict = loader .get_normals_periods ()
1205
- loader .create_index ('normals' )
1220
+
1221
+ index_name = loader .create_index ('normals' )
1222
+
1206
1223
normals = loader .generate_normals (
1207
- stn_dict , normals_dict , periods_dict
1224
+ stn_dict , normals_dict , periods_dict , index_name
1225
+ )
1226
+ indexing_succesful = loader .conn .submit_elastic_package (
1227
+ normals , batch_size
1208
1228
)
1209
- loader .conn .submit_elastic_package (normals , batch_size )
1229
+
1230
+ if indexing_succesful and full_reindex :
1231
+ loader .conn .create_alias (
1232
+ 'climate_normals_data' , index_name , overwrite = True
1233
+ )
1210
1234
except Exception as err :
1211
1235
msg = f'Could not populate normals index: { err } '
1212
1236
raise click .ClickException (msg )
@@ -1215,10 +1239,29 @@ def add(
1215
1239
try :
1216
1240
click .echo ('Populating monthly index' )
1217
1241
stn_dict = loader .get_station_data (station , starting_from )
1218
- if not (date or station or starting_from ):
1219
- loader .create_index ('monthly_summary' )
1220
- monthlies = loader .generate_monthly_data (stn_dict , date )
1221
- loader .conn .submit_elastic_package (monthlies , batch_size )
1242
+
1243
+ if full_reindex :
1244
+ index_name = loader .create_index ('monthly_summary' )
1245
+ else :
1246
+ index_name = loader .conn .get_alias_indices ('climate_public_climate_summary' )[0 ] # noqa
1247
+ if index_name is None :
1248
+ raise click .ClickException (
1249
+ 'No associated index found for alias climate_public_climate_summary.' # noqa
1250
+ )
1251
+
1252
+ monthlies = loader .generate_monthly_data (
1253
+ stn_dict , index_name , date
1254
+ )
1255
+ indexing_succesful = loader .conn .submit_elastic_package (
1256
+ monthlies , batch_size
1257
+ )
1258
+
1259
+ if indexing_succesful and full_reindex :
1260
+ loader .conn .create_alias (
1261
+ 'climate_public_climate_summary' ,
1262
+ index_name ,
1263
+ overwrite = True
1264
+ )
1222
1265
except Exception as err :
1223
1266
msg = f'Could not populate montly index: { err } '
1224
1267
raise click .ClickException (msg )
@@ -1227,10 +1270,26 @@ def add(
1227
1270
try :
1228
1271
click .echo ('Populating daily index' )
1229
1272
stn_dict = loader .get_station_data (station , starting_from )
1230
- if not (date or station or starting_from ):
1231
- loader .create_index ('daily_summary' )
1232
- dailies = loader .generate_daily_data (stn_dict , date )
1233
- loader .conn .submit_elastic_package (dailies , batch_size )
1273
+
1274
+ if full_reindex :
1275
+ index_name = loader .create_index ('daily_summary' )
1276
+ else :
1277
+ index_name = loader .conn .get_alias_indices ('climate_public_daily_data' )[0 ] # noqa
1278
+ if index_name is None :
1279
+ raise click .ClickException (
1280
+ 'No index found for alias climate_public_daily_data.'
1281
+ )
1282
+
1283
+ dailies = loader .generate_daily_data (stn_dict , index_name , date )
1284
+ indexing_succesful = loader .conn .submit_elastic_package (
1285
+ dailies , batch_size
1286
+ )
1287
+
1288
+ if indexing_succesful and full_reindex :
1289
+ loader .conn .create_alias (
1290
+ 'climate_public_daily_data' , index_name , overwrite = True
1291
+ )
1292
+
1234
1293
except Exception as err :
1235
1294
msg = f'Could not populate daily index: { err } '
1236
1295
raise click .ClickException (msg )
@@ -1239,10 +1298,26 @@ def add(
1239
1298
try :
1240
1299
click .echo ('Populating hourly index' )
1241
1300
stn_dict = loader .get_station_data (station , starting_from )
1242
- if not (date or station or starting_from ):
1301
+
1302
+ if full_reindex :
1243
1303
loader .create_index ('hourly_summary' )
1244
- hourlies = loader .generate_hourly_data (stn_dict , date )
1245
- loader .conn .submit_elastic_package (hourlies , batch_size )
1304
+ else :
1305
+ index_name = loader .conn .get_alias_indices ('climate_public_hourly_data' )[0 ] # noqa
1306
+ if index_name is None :
1307
+ raise click .ClickException (
1308
+ 'No index found for alias climate_public_hourly_data.'
1309
+ )
1310
+
1311
+ hourlies = loader .generate_hourly_data (stn_dict , index_name , date )
1312
+ indexing_succesful = loader .conn .submit_elastic_package (
1313
+ hourlies , batch_size
1314
+ )
1315
+
1316
+ if indexing_succesful and full_reindex :
1317
+ loader .conn .create_alias (
1318
+ 'climate_public_hourly_data' , index_name , overwrite = True
1319
+ )
1320
+
1246
1321
except Exception as err :
1247
1322
msg = f'Could not populate hourly index: { err } '
1248
1323
raise click .ClickException (msg )
0 commit comments