1313
1414files_cache_logger = create_logger ("borg.debug.files_cache" )
1515
16+ from borgstore .store import ItemInfo
17+
1618from .constants import CACHE_README , FILES_CACHE_MODE_DISABLED , ROBJ_FILE_STREAM , TIME_DIFFERS2_NS
1719from .checksums import xxh64
1820from .hashindex import ChunkIndex , ChunkIndexEntry
@@ -663,63 +665,125 @@ def memorize_file(self, hashed_path, path_hash, st, chunks):
663665 )
664666
665667
666- def load_chunks_hash (repository ) -> bytes :
667- try :
668- hash = repository .store_load ("cache/chunks_hash" )
669- logger .debug (f"cache/chunks_hash is '{ bin_to_hex (hash )} '." )
670- except (Repository .ObjectNotFound , StoreObjectNotFound ):
671- # TODO: ^ seem like RemoteRepository raises Repository.ONF instead of StoreONF
672- hash = b""
673- logger .debug ("cache/chunks_hash missing!" )
674- return hash
668+ def try_upgrade_to_b14 (repository ):
669+ # TODO: remove this before 2.0.0 release
670+ # we just delete any present chunk index cache here, it is invalid due to the
671+ # refcount -> flags change we did and due to the different CHUNKINDEX_HASH_SEED.
672+ for name in "chunks_hash" , "chunks" :
673+ try :
674+ repository .store_delete (f"cache/{ name } " )
675+ except (Repository .ObjectNotFound , StoreObjectNotFound ):
676+ pass # likely already upgraded
677+
678+
679+ def list_chunkindex_hashes (repository ):
680+ hashes = []
681+ for info in repository .store_list ("cache" ):
682+ info = ItemInfo (* info ) # RPC does not give namedtuple
683+ if info .name .startswith ("chunks." ):
684+ hash = info .name .removeprefix ("chunks." )
685+ hashes .append (hash )
686+ hashes = sorted (hashes )
687+ logger .debug (f"cached chunk indexes: { hashes } " )
688+ return hashes
689+
690+
691+ def delete_chunkindex_cache (repository ):
692+ hashes = list_chunkindex_hashes (repository )
693+ for hash in hashes :
694+ cache_name = f"cache/chunks.{ hash } "
695+ try :
696+ repository .store_delete (cache_name )
697+ except (Repository .ObjectNotFound , StoreObjectNotFound ):
698+ # TODO: ^ seem like RemoteRepository raises Repository.ONF instead of StoreONF
699+ pass
700+ logger .debug (f"cached chunk indexes deleted: { hashes } " )
675701
676702
677703CHUNKINDEX_HASH_SEED = 2
678704
679705
680- def write_chunkindex_to_repo_cache (repository , chunks , * , clear = False , force_write = False ):
681- cached_hash = load_chunks_hash (repository )
706+ def write_chunkindex_to_repo_cache (
707+ repository , chunks , * , clear = False , force_write = False , delete_other = False , delete_these = None
708+ ):
709+ cached_hashes = list_chunkindex_hashes (repository )
682710 with io .BytesIO () as f :
683711 chunks .write (f )
684712 data = f .getvalue ()
685713 if clear :
686714 # if we don't need the in-memory chunks index anymore:
687715 chunks .clear () # free memory, immediately
688- new_hash = xxh64 (data , seed = CHUNKINDEX_HASH_SEED )
689- if force_write or new_hash != cached_hash :
690- # when an updated chunks index is stored into the cache, we also store its hash into the cache .
716+ new_hash = bin_to_hex ( xxh64 (data , seed = CHUNKINDEX_HASH_SEED ) )
717+ if force_write or new_hash not in cached_hashes :
718+ # when an updated chunks index is stored into the cache, we also store its hash as part of the name .
691719 # when a client is loading the chunks index from a cache, it has to compare its xxh64
692- # hash against cache/chunks_hash in the repository. if it is the same, the cache
693- # is valid. If it is different, the cache is either corrupted or out of date and
694- # has to be discarded.
695- # when some functionality is DELETING chunks from the repository, it has to either update
696- # both cache/chunks and cache/chunks_hash (like borg compact does) or it has to delete both,
720+ # hash against the hash in its name. if it is the same, the cache is valid.
721+ # if it is different, the cache is either corrupted or out of date and has to be discarded.
722+ # when some functionality is DELETING chunks from the repository, it has to delete
723+ # all existing cache/chunks.* and maybe write a new, valid cache/chunks.<hash>,
697724 # so that all clients will discard any client-local chunks index caches.
698- logger .debug (f"caching chunks index { bin_to_hex (new_hash )} in repository..." )
699- repository .store_store ("cache/chunks" , data )
700- repository .store_store ("cache/chunks_hash" , new_hash )
725+ cache_name = f"cache/chunks.{ new_hash } "
726+ logger .debug (f"caching chunks index as { cache_name } in repository..." )
727+ repository .store_store (cache_name , data )
728+ if delete_other :
729+ delete_these = cached_hashes
730+ elif delete_these :
731+ pass
732+ else :
733+ delete_these = []
734+ for hash in delete_these :
735+ cache_name = f"cache/chunks.{ hash } "
736+ try :
737+ repository .store_delete (cache_name )
738+ except (Repository .ObjectNotFound , StoreObjectNotFound ):
739+ # TODO: ^ seem like RemoteRepository raises Repository.ONF instead of StoreONF
740+ pass
741+ if delete_these :
742+ logger .debug (f"cached chunk indexes deleted: { delete_these } " )
701743 return new_hash
702744
703745
746+ def read_chunkindex_from_repo_cache (repository , hash ):
747+ cache_name = f"cache/chunks.{ hash } "
748+ logger .debug (f"trying to load { cache_name } from the repo..." )
749+ try :
750+ chunks_data = repository .store_load (cache_name )
751+ except (Repository .ObjectNotFound , StoreObjectNotFound ):
752+ # TODO: ^ seem like RemoteRepository raises Repository.ONF instead of StoreONF
753+ logger .debug (f"{ cache_name } not found in the repository." )
754+ else :
755+ if xxh64 (chunks_data , seed = CHUNKINDEX_HASH_SEED ) == hex_to_bin (hash ):
756+ logger .debug (f"{ cache_name } is valid." )
757+ with io .BytesIO (chunks_data ) as f :
758+ chunks = ChunkIndex .read (f )
759+ return chunks
760+ else :
761+ logger .debug (f"{ cache_name } is invalid." )
762+
763+
704764def build_chunkindex_from_repo (repository , * , disable_caches = False , cache_immediately = False ):
705- chunks = None
706- # first, try to load a pre-computed and centrally cached chunks index :
765+ try_upgrade_to_b14 ( repository )
766+ # first, try to build a fresh, mostly complete chunk index from centrally cached chunk indexes :
707767 if not disable_caches :
708- wanted_hash = load_chunks_hash (repository )
709- logger .debug (f"trying to get cached chunk index (id { bin_to_hex (wanted_hash )} ) from the repo..." )
710- try :
711- chunks_data = repository .store_load ("cache/chunks" )
712- except (Repository .ObjectNotFound , StoreObjectNotFound ):
713- # TODO: ^ seem like RemoteRepository raises Repository.ONF instead of StoreONF
714- logger .debug ("cache/chunks not found in the repository." )
715- else :
716- if xxh64 (chunks_data , seed = CHUNKINDEX_HASH_SEED ) == wanted_hash :
717- logger .debug ("cache/chunks is valid." )
718- with io .BytesIO (chunks_data ) as f :
719- chunks = ChunkIndex .read (f )
768+ hashes = list_chunkindex_hashes (repository )
769+ if hashes : # we have at least one cached chunk index!
770+ merged = 0
771+ chunks = ChunkIndex () # we'll merge all we find into this
772+ for hash in hashes :
773+ chunks_to_merge = read_chunkindex_from_repo_cache (repository , hash )
774+ if chunks_to_merge is not None :
775+ logger .debug (f"cached chunk index { hash } gets merged..." )
776+ for k , v in chunks_to_merge .items ():
777+ chunks [k ] = v
778+ merged += 1
779+ chunks_to_merge .clear ()
780+ if merged > 0 :
781+ if merged > 1 and cache_immediately :
782+ # immediately update cache/chunks, so we don't have to merge these again:
783+ write_chunkindex_to_repo_cache (
784+ repository , chunks , clear = False , force_write = True , delete_these = hashes
785+ )
720786 return chunks
721- else :
722- logger .debug ("cache/chunks is invalid." )
723787 # if we didn't get anything from the cache, compute the ChunkIndex the slow way:
724788 logger .debug ("querying the chunk IDs list from the repo..." )
725789 chunks = ChunkIndex ()
@@ -741,7 +805,7 @@ def build_chunkindex_from_repo(repository, *, disable_caches=False, cache_immedi
741805 logger .debug (f"queried { num_chunks } chunk IDs in { duration } s, ~{ speed } /s" )
742806 if cache_immediately :
743807 # immediately update cache/chunks, so we only rarely have to do it the slow way:
744- write_chunkindex_to_repo_cache (repository , chunks , clear = False , force_write = True )
808+ write_chunkindex_to_repo_cache (repository , chunks , clear = False , force_write = True , delete_other = True )
745809 return chunks
746810
747811
0 commit comments