23
23
from pathlib import Path
24
24
from queue import Empty , Queue
25
25
from threading import Event
26
- from typing import Dict , List , Optional
26
+ from typing import Dict , List , NamedTuple , Optional
27
27
28
28
import psycopg2
29
29
from rohmu import dates , get_transfer , rohmufile
39
39
replication_connection_string_and_slot_using_pgpass , write_json_file
40
40
)
41
41
from pghoard .compressor import (
42
- CompressionEvent , CompressionQueue , CompressorThread , WALFileDeleterThread , WalFileDeletionQueue
42
+ CompressionEvent , CompressionQueue , CompressorThread , WALFileDeleterThread , WalFileDeletionEvent , WalFileDeletionQueue
43
43
)
44
44
from pghoard .preservation_request import (
45
45
is_basebackup_preserved , parse_preservation_requests , patch_basebackup_metadata_with_preservation
@@ -56,6 +56,14 @@ class DeltaBaseBackupFailureInfo:
56
56
retries : int = 0
57
57
58
58
59
+ class BackupSitePaths (NamedTuple ):
60
+ compressed_xlog_path : str
61
+ compressed_timeline_path : str
62
+ uncompressed_files_path : str
63
+ basebackup_path : str
64
+ uncompressed_basebackup_path : str
65
+
66
+
59
67
class InotifyAdapter :
60
68
def __init__ (self , queue ):
61
69
self .queue = queue
@@ -287,24 +295,25 @@ def start_walreceiver(self, site, chosen_backup_node, last_flushed_lsn):
287
295
def _get_site_prefix (self , site ):
288
296
return self .config ["backup_sites" ][site ]["prefix" ]
289
297
290
- def create_backup_site_paths (self , site ) :
298
+ def create_backup_site_paths (self , site : str ) -> BackupSitePaths :
291
299
site_path = os .path .join (self .config ["backup_location" ], self ._get_site_prefix (site ))
292
300
xlog_path = os .path .join (site_path , "xlog" )
301
+ timeline_path = os .path .join (site_path , "timeline" )
293
302
basebackup_path = os .path .join (site_path , "basebackup" )
294
303
295
- paths_to_create = [
296
- site_path ,
297
- xlog_path ,
298
- xlog_path + "_incoming" ,
299
- basebackup_path ,
300
- basebackup_path + "_incoming" ,
301
- ]
304
+ backup_site_paths = BackupSitePaths (
305
+ uncompressed_files_path = xlog_path + "_incoming" ,
306
+ compressed_xlog_path = xlog_path ,
307
+ compressed_timeline_path = timeline_path ,
308
+ basebackup_path = basebackup_path ,
309
+ uncompressed_basebackup_path = basebackup_path + "_incoming" ,
310
+ )
302
311
303
- for path in paths_to_create :
312
+ for path in backup_site_paths :
304
313
if not os .path .exists (path ):
305
314
os .makedirs (path )
306
315
307
- return xlog_path , basebackup_path
316
+ return backup_site_paths
308
317
309
318
def delete_remote_wal_before (self , wal_segment , site , pg_version ):
310
319
self .log .info ("Starting WAL deletion from: %r before: %r, pg_version: %r" , site , wal_segment , pg_version )
@@ -577,12 +586,15 @@ def startup_walk_for_missed_files(self):
577
586
"""Check xlog and xlog_incoming directories for files that receivexlog has received but not yet
578
587
compressed as well as the files we have compressed but not yet uploaded and process them."""
579
588
for site in self .config ["backup_sites" ]:
580
- compressed_xlog_path , _ = self .create_backup_site_paths (site )
581
- uncompressed_xlog_path = compressed_xlog_path + "_incoming"
589
+ backup_site_paths = self .create_backup_site_paths (site )
590
+
591
+ compressed_xlog_path = backup_site_paths .compressed_xlog_path
592
+ compressed_timeline_path = backup_site_paths .compressed_timeline_path
593
+ uncompressed_files_path = backup_site_paths .uncompressed_files_path
582
594
583
595
# Process uncompressed files (ie WAL pg_receivexlog received)
584
- for filename in os .listdir (uncompressed_xlog_path ):
585
- full_path = os .path .join (uncompressed_xlog_path , filename )
596
+ for filename in os .listdir (uncompressed_files_path ):
597
+ full_path = os .path .join (uncompressed_files_path , filename )
586
598
if wal .PARTIAL_WAL_RE .match (filename ):
587
599
# pg_receivewal may have been in the middle of storing WAL file when PGHoard was stopped.
588
600
# If the file is 0 or 16 MiB in size it will continue normally but in some cases the file can be
@@ -609,6 +621,34 @@ def startup_walk_for_missed_files(self):
609
621
610
622
filetype = FileType .Timeline if wal .TIMELINE_RE .match (filename ) else FileType .Wal
611
623
624
+ # verify if file was already compressed, otherwise the transfer agent will encounter
625
+ # duplicated UploadEvents. In case it was compressed, we should just add it to the deletion queue
626
+ base_compressed_file_path = (
627
+ compressed_timeline_path if filetype is FileType .Timeline else compressed_xlog_path
628
+ )
629
+ compressed_file_path = os .path .join (base_compressed_file_path , filename )
630
+ is_already_compressed = os .path .exists (compressed_file_path )
631
+ has_metadata_file = os .path .exists (compressed_file_path + ".metadata" )
632
+
633
+ # the file was compressed correctly
634
+ if is_already_compressed and has_metadata_file :
635
+ self .log .debug ("Uncompressed file %r is already compressed, adding to deletion queue." , full_path )
636
+ if filetype is FileType .Timeline :
637
+ os .unlink (full_path )
638
+ else :
639
+ delete_request = WalFileDeletionEvent (backup_site_name = site , file_path = Path (full_path ))
640
+ self .log .info ("Adding an Uncompressed WAL file to deletion queue: %s" , full_path )
641
+ self .wal_file_deletion_queue .put (delete_request )
642
+ continue
643
+
644
+ # delete compressed file and re-try
645
+ if is_already_compressed and not has_metadata_file :
646
+ self .log .info (
647
+ "Deleting incomplete compressed file %r (missing metadata), compression will be re-tried" ,
648
+ compressed_file_path ,
649
+ )
650
+ os .unlink (compressed_file_path )
651
+
612
652
compression_event = CompressionEvent (
613
653
file_type = filetype ,
614
654
file_path = FileTypePrefixes [filetype ] / filename ,
@@ -622,32 +662,35 @@ def startup_walk_for_missed_files(self):
622
662
self .compression_queue .put (compression_event )
623
663
624
664
# Process compressed files (ie things we've processed but not yet uploaded)
625
- for filename in os .listdir (compressed_xlog_path ):
626
- if filename .endswith (".metadata" ):
627
- continue # silently ignore .metadata files, they're expected and processed below
628
- full_path = os .path .join (compressed_xlog_path , filename )
629
- metadata_path = full_path + ".metadata"
630
- is_xlog = wal .WAL_RE .match (filename )
631
- is_timeline = wal .TIMELINE_RE .match (filename )
632
- if not ((is_xlog or is_timeline ) and os .path .exists (metadata_path )):
633
- self .log .warning ("Found invalid file %r from compressed xlog directory" , full_path )
634
- continue
635
- with open (metadata_path , "r" ) as fp :
636
- metadata = json .load (fp )
637
-
638
- file_type = FileType .Wal if is_xlog else FileType .Timeline
639
-
640
- transfer_event = UploadEvent (
641
- file_type = file_type ,
642
- backup_site_name = site ,
643
- file_size = os .path .getsize (full_path ),
644
- file_path = FileTypePrefixes [file_type ] / filename ,
645
- source_data = Path (full_path ),
646
- callback_queue = None ,
647
- metadata = metadata
648
- )
649
- self .log .debug ("Found: %r when starting up, adding to transfer queue" , transfer_event )
650
- self .transfer_queue .put (transfer_event )
665
+ for compressed_file_dir in [compressed_xlog_path , compressed_timeline_path ]:
666
+ for filename in os .listdir (compressed_file_dir ):
667
+ if filename .endswith (".metadata" ):
668
+ continue # silently ignore .metadata files, they're expected and processed below
669
+
670
+ full_path = os .path .join (compressed_file_dir , filename )
671
+ metadata_path = full_path + ".metadata"
672
+ is_xlog = wal .WAL_RE .match (filename )
673
+ is_timeline = wal .TIMELINE_RE .match (filename )
674
+
675
+ if not ((is_xlog or is_timeline ) and os .path .exists (metadata_path )):
676
+ self .log .warning ("Found invalid file %r from compressed xlog directory" , full_path )
677
+ continue
678
+ with open (metadata_path , "r" ) as fp :
679
+ metadata = json .load (fp )
680
+
681
+ file_type = FileType .Wal if is_xlog else FileType .Timeline
682
+
683
+ transfer_event = UploadEvent (
684
+ file_type = file_type ,
685
+ backup_site_name = site ,
686
+ file_size = os .path .getsize (full_path ),
687
+ file_path = FileTypePrefixes [file_type ] / filename ,
688
+ source_data = Path (full_path ),
689
+ callback_queue = None ,
690
+ metadata = metadata
691
+ )
692
+ self .log .debug ("Found: %r when starting up, adding to transfer queue" , transfer_event )
693
+ self .transfer_queue .put (transfer_event )
651
694
652
695
def start_threads_on_startup (self ):
653
696
# Startup threads
@@ -668,7 +711,7 @@ def _cleanup_inactive_receivexlogs(self, site):
668
711
669
712
def handle_site (self , site , site_config ):
670
713
self .set_state_defaults (site )
671
- xlog_path , basebackup_path = self .create_backup_site_paths (site )
714
+ backup_site_paths = self .create_backup_site_paths (site )
672
715
673
716
if not site_config ["active" ]:
674
717
return # If a site has been marked inactive, don't bother checking anything
@@ -679,7 +722,7 @@ def handle_site(self, site, site_config):
679
722
680
723
if site not in self .receivexlogs and site not in self .walreceivers :
681
724
if site_config ["active_backup_mode" ] == "pg_receivexlog" :
682
- self .receivexlog_listener (site , chosen_backup_node , xlog_path + "_incoming" )
725
+ self .receivexlog_listener (site , chosen_backup_node , backup_site_paths . uncompressed_files_path )
683
726
elif site_config ["active_backup_mode" ] == "walreceiver" :
684
727
state_file_path = self .config ["json_state_file_path" ]
685
728
walreceiver_state = {}
@@ -745,7 +788,13 @@ def handle_site(self, site, site_config):
745
788
return
746
789
747
790
self .basebackups_callbacks [site ] = Queue ()
748
- self .create_basebackup (site , chosen_backup_node , basebackup_path , self .basebackups_callbacks [site ], metadata )
791
+ self .create_basebackup (
792
+ site = site ,
793
+ connection_info = chosen_backup_node ,
794
+ basebackup_path = backup_site_paths .basebackup_path ,
795
+ callback_queue = self .basebackups_callbacks [site ],
796
+ metadata = metadata ,
797
+ )
749
798
750
799
def get_new_backup_details (self , * , now = None , site , site_config ):
751
800
"""Returns metadata to associate with new backup that needs to be created or None in case no backup should
0 commit comments