@@ -795,12 +795,6 @@ impl<B: Blob> Append<B> {
795795 }
796796
797797 /// Durably rewrite a committed page to a shorter partial length.
798- ///
799- /// Since larger valid lengths are authoritative, a shorter CRC cannot simply be written next to
800- /// the old CRC. We first stage the shorter slot with length 0, then make its length durable,
801- /// then clear the old slot's length bytes. A crash during any phase recovers either the old
802- /// longer page or the new shorter page, but never loses the whole page or fabricates a larger
803- /// length.
804798 async fn sync_partial_page_shrink (
805799 blob : & B ,
806800 page : u64 ,
@@ -809,6 +803,9 @@ impl<B: Blob> Append<B> {
809803 new_crc : u32 ,
810804 old_crc : & Checksum ,
811805 ) -> Result < Checksum , Error > {
806+ // Since larger valid lengths are authoritative, a shorter slot cannot simply be written
807+ // next to the old slot. We stage the shorter slot with length 0, make its length durable,
808+ // then clear the previous slot's length bytes.
812809 let physical_page_size = logical_page_size
813810 . checked_add ( CHECKSUM_SIZE )
814811 . ok_or ( Error :: OffsetOverflow ) ?;
@@ -817,33 +814,33 @@ impl<B: Blob> Append<B> {
817814 . and_then ( |start| start. checked_add ( logical_page_size) )
818815 . ok_or ( Error :: OffsetOverflow ) ?;
819816
820- let new_slot_start = if old_crc. len1 >= old_crc. len2 {
821- CHECKSUM_SLOT_SIZE
817+ let ( new_slot_start, old_slot_start ) = if old_crc. len1 >= old_crc. len2 {
818+ ( CHECKSUM_SLOT_SIZE , 0 )
822819 } else {
823- 0
820+ ( 0 , CHECKSUM_SLOT_SIZE )
824821 } ;
825822 let new_slot_offset = crc_start
826823 . checked_add ( new_slot_start as u64 )
827824 . ok_or ( Error :: OffsetOverflow ) ?;
825+
826+ // Stage the new slot with a 0 length and the shrunken page CRC.
828827 let staged_slot = Self :: checksum_slot_bytes ( 0 , new_crc) ;
829828 blob. write_at ( new_slot_offset, staged_slot. to_vec ( ) ) . await ?;
830829 blob. sync ( ) . await ?;
831830
831+ // Make the new shrunken length durable. We cannot write the CRC and new length in one go
832+ // without introducing crash safety concerns due to partial writes.
832833 blob. write_at ( new_slot_offset, new_len. to_be_bytes ( ) . to_vec ( ) )
833834 . await ?;
834835 blob. sync ( ) . await ?;
835836
836- let old_slot_start = if new_slot_start == 0 {
837- CHECKSUM_SLOT_SIZE
838- } else {
839- 0
840- } ;
841837 let old_slot_offset = crc_start
842838 . checked_add ( old_slot_start as u64 )
843839 . ok_or ( Error :: OffsetOverflow ) ?;
844840 let len_size = std:: mem:: size_of :: < u16 > ( ) ;
845841
846- // A slot with length 0 is invalid regardless of its CRC.
842+ // Clear the old slot's length. A slot with length 0 is never authoritative, so the slot
843+ // representing the shrunken page becomes authoritative.
847844 blob. write_at ( old_slot_offset, vec ! [ 0u8 ; len_size] ) . await ?;
848845 blob. sync ( ) . await ?;
849846
@@ -983,24 +980,23 @@ impl<B: Blob> Append<B> {
983980 full_pages * physical_page_size
984981 } ;
985982
983+ // Drop cached pages at or beyond the new tail. Future appends may reuse those logical
984+ // offsets, and cache-only reads must not see pre-shrink bytes there.
985+ blob_guard. blob . resize ( new_physical_size) . await ?;
986+ self . cache_ref . invalidate_from ( self . id , full_pages) ;
987+
986988 if partial_bytes > 0 {
987989 self . shrink_protected_partial (
988990 & mut buf_guard,
989991 & mut blob_guard,
990- new_physical_size,
991992 full_pages,
992993 partial_bytes,
993994 logical_page_size,
994995 )
995996 . await
996997 } else {
997- self . shrink_standard (
998- & mut buf_guard,
999- & mut blob_guard,
1000- new_physical_size,
1001- full_pages,
1002- )
1003- . await
998+ self . shrink_standard ( & mut buf_guard, & mut blob_guard, full_pages)
999+ . await
10041000 }
10051001 }
10061002
@@ -1009,18 +1005,10 @@ impl<B: Blob> Append<B> {
10091005 & self ,
10101006 buf_guard : & mut Buffer ,
10111007 blob_guard : & mut BlobState < B > ,
1012- new_physical_size : u64 ,
10131008 full_pages : u64 ,
10141009 partial_bytes : u64 ,
10151010 logical_page_size : u64 ,
10161011 ) -> Result < ( ) , Error > {
1017- blob_guard. blob . resize ( new_physical_size) . await ?;
1018-
1019- // Evict cached pages at or beyond the new full-page boundary. The page at
1020- // `full_pages` is now owned by the tip buffer, and anything above is beyond the new
1021- // logical size.
1022- self . cache_ref . invalidate_from ( self . id , full_pages) ;
1023-
10241012 // Update blob state and buffer based on the desired logical size. The page data is
10251013 // read with CRC validation, then durably rewritten below with a shorter CRC.
10261014 blob_guard. current_page = full_pages;
@@ -1061,17 +1049,10 @@ impl<B: Blob> Append<B> {
10611049 & self ,
10621050 buf_guard : & mut Buffer ,
10631051 blob_guard : & mut BlobState < B > ,
1064- new_physical_size : u64 ,
10651052 full_pages : u64 ,
10661053 ) -> Result < ( ) , Error > {
1067- // Resize the underlying blob.
1068- blob_guard. blob . resize ( new_physical_size) . await ?;
10691054 blob_guard. partial_page_state = None ;
10701055
1071- // Evict cached pages at or beyond the new full-page boundary. Leaving pre-resize contents
1072- // in the cache lets `try_read_sync` observe stale bytes once the tip is repopulated.
1073- self . cache_ref . invalidate_from ( self . id , full_pages) ;
1074-
10751056 // Update blob state and buffer based on the desired logical size.
10761057 blob_guard. current_page = full_pages;
10771058 buf_guard. offset = full_pages * self . cache_ref . page_size ( ) ;
0 commit comments