From 84b35a5e77adfb2922117683dd07a0d173a5c1f7 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Tue, 6 May 2025 23:46:42 -0700 Subject: [PATCH 01/20] Handle free list revivification in RMW --- .../ClientSession/SessionFunctionsWrapper.cs | 2 +- .../core/Index/Interfaces/CallbackInfos.cs | 2 + .../Index/Tsavorite/Implementation/Helpers.cs | 50 +++++++++++++++++++ .../Implementation/InternalDelete.cs | 45 +---------------- .../Tsavorite/Implementation/InternalRMW.cs | 22 ++++++-- 5 files changed, 73 insertions(+), 48 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/ClientSession/SessionFunctionsWrapper.cs b/libs/storage/Tsavorite/cs/src/core/ClientSession/SessionFunctionsWrapper.cs index 9f3eb138255..49feed18b1a 100644 --- a/libs/storage/Tsavorite/cs/src/core/ClientSession/SessionFunctionsWrapper.cs +++ b/libs/storage/Tsavorite/cs/src/core/ClientSession/SessionFunctionsWrapper.cs @@ -101,7 +101,7 @@ public bool PostCopyUpdater(ref TKey key, ref TInput input, ref TValue oldValue, [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool InPlaceUpdater(long physicalAddress, ref TKey key, ref TInput input, ref TValue value, ref TOutput output, ref RMWInfo rmwInfo, out OperationStatus status, ref RecordInfo recordInfo) { - (rmwInfo.UsedValueLength, rmwInfo.FullValueLength, _) = _clientSession.store.GetRecordLengths(physicalAddress, ref value, ref recordInfo); + (rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength) = _clientSession.store.GetRecordLengths(physicalAddress, ref value, ref recordInfo); if (_clientSession.functions.InPlaceUpdater(ref key, ref input, ref value, ref output, ref rmwInfo, ref recordInfo)) { diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Interfaces/CallbackInfos.cs b/libs/storage/Tsavorite/cs/src/core/Index/Interfaces/CallbackInfos.cs index bb5c7d7d07c..6f78b6160b6 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Interfaces/CallbackInfos.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Interfaces/CallbackInfos.cs @@ -214,6 +214,8 @@ public struct RMWInfo /// public int FullValueLength { get; internal set; } + public int FullRecordLength { get; internal set; } + /// /// If set true by CopyUpdater, the source record for the RCU will not be elided from the tag chain even if this is otherwise possible. /// diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/Helpers.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/Helpers.cs index cee1505547a..52dbfd5cb49 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/Helpers.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/Helpers.cs @@ -253,5 +253,55 @@ private bool FindTagAndTryTransientSLock( + TSessionFunctionsWrapper sessionFunctions, ref OperationStackContext stackCtx, ref RecordInfo srcRecordInfo, int usedValueLength, int fullValueLength, int fullRecordLength) + where TSessionFunctionsWrapper : ISessionFunctionsWrapper + { + if (!RevivificationManager.IsEnabled) + { + // We are not doing revivification, so we just want to remove the record from the tag chain so we don't potentially do an IO later for key + // traceback. If we succeed, we need to SealAndInvalidate. It's fine if we don't succeed here; this is just tidying up the HashBucket. + if (stackCtx.hei.TryElide()) + srcRecordInfo.SealAndInvalidate(); + } + else if (RevivificationManager.UseFreeRecordPool) + { + // For non-FreeRecordPool revivification, we leave the record in as a normal tombstone so we can revivify it in the chain for the same key. + // For FreeRecord Pool we must first Seal here, even if we're using the LockTable, because the Sealed state must survive this Delete() call. + // We invalidate it also for checkpoint/recovery consistency (this removes Sealed bit so Scan would enumerate records that are not in any + // tag chain--they would be in the freelist if the freelist survived Recovery), but we restore the Valid bit if it is returned to the chain, + // which due to epoch protection is guaranteed to be done before the record can be written to disk and violate the "No Invalid records in + // tag chain" invariant. + srcRecordInfo.SealAndInvalidate(); + + bool isElided = false, isAdded = false; + Debug.Assert(srcRecordInfo.Tombstone, $"Unexpected loss of Tombstone; Record should have been XLocked or SealInvalidated. RecordInfo: {srcRecordInfo.ToString()}"); + (isElided, isAdded) = TryElideAndTransferToFreeList(sessionFunctions, ref stackCtx, ref srcRecordInfo, + (usedValueLength, fullValueLength, fullRecordLength)); + + if (!isElided) + { + // Leave this in the chain as a normal Tombstone; we aren't going to add a new record so we can't leave this one sealed. + srcRecordInfo.UnsealAndValidate(); + } + else if (!isAdded && RevivificationManager.restoreDeletedRecordsIfBinIsFull) + { + // The record was not added to the freelist, but was elided. See if we can put it back in as a normal Tombstone. Since we just + // elided it and the elision criteria is that it is the only above-BeginAddress record in the chain, and elision sets the + // HashBucketEntry.word to 0, it means we do not expect any records for this key's tag to exist after the elision. Therefore, + // we can re-insert the record iff the HashBucketEntry's address is <= kTempInvalidAddress. + stackCtx.hei = new(stackCtx.hei.hash); + FindOrCreateTag(ref stackCtx.hei, hlogBase.BeginAddress); + + if (stackCtx.hei.entry.Address <= Constants.kTempInvalidAddress && stackCtx.hei.TryCAS(stackCtx.recSrc.LogicalAddress)) + srcRecordInfo.UnsealAndValidate(); + } + } + } } } \ No newline at end of file diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalDelete.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalDelete.cs index ea977dd681f..218b8c0b822 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalDelete.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalDelete.cs @@ -119,51 +119,10 @@ internal OperationStatus InternalDelete(sessionFunctions, ref stackCtx, ref srcRecordInfo)) { - if (!RevivificationManager.IsEnabled) - { - // We are not doing revivification, so we just want to remove the record from the tag chain so we don't potentially do an IO later for key - // traceback. If we succeed, we need to SealAndInvalidate. It's fine if we don't succeed here; this is just tidying up the HashBucket. - if (stackCtx.hei.TryElide()) - srcRecordInfo.SealAndInvalidate(); - } - else if (RevivificationManager.UseFreeRecordPool) - { - // For non-FreeRecordPool revivification, we leave the record in as a normal tombstone so we can revivify it in the chain for the same key. - // For FreeRecord Pool we must first Seal here, even if we're using the LockTable, because the Sealed state must survive this Delete() call. - // We invalidate it also for checkpoint/recovery consistency (this removes Sealed bit so Scan would enumerate records that are not in any - // tag chain--they would be in the freelist if the freelist survived Recovery), but we restore the Valid bit if it is returned to the chain, - // which due to epoch protection is guaranteed to be done before the record can be written to disk and violate the "No Invalid records in - // tag chain" invariant. - srcRecordInfo.SealAndInvalidate(); - - bool isElided = false, isAdded = false; - Debug.Assert(srcRecordInfo.Tombstone, $"Unexpected loss of Tombstone; Record should have been XLocked or SealInvalidated. RecordInfo: {srcRecordInfo.ToString()}"); - (isElided, isAdded) = TryElideAndTransferToFreeList(sessionFunctions, ref stackCtx, ref srcRecordInfo, - (deleteInfo.UsedValueLength, deleteInfo.FullValueLength, fullRecordLength)); - - if (!isElided) - { - // Leave this in the chain as a normal Tombstone; we aren't going to add a new record so we can't leave this one sealed. - srcRecordInfo.UnsealAndValidate(); - } - else if (!isAdded && RevivificationManager.restoreDeletedRecordsIfBinIsFull) - { - // The record was not added to the freelist, but was elided. See if we can put it back in as a normal Tombstone. Since we just - // elided it and the elision criteria is that it is the only above-BeginAddress record in the chain, and elision sets the - // HashBucketEntry.word to 0, it means we do not expect any records for this key's tag to exist after the elision. Therefore, - // we can re-insert the record iff the HashBucketEntry's address is <= kTempInvalidAddress. - stackCtx.hei = new(stackCtx.hei.hash); - FindOrCreateTag(ref stackCtx.hei, hlogBase.BeginAddress); - - if (stackCtx.hei.entry.Address <= Constants.kTempInvalidAddress && stackCtx.hei.TryCAS(stackCtx.recSrc.LogicalAddress)) - srcRecordInfo.UnsealAndValidate(); - } - } + HandleRecordElision( + sessionFunctions, ref stackCtx, ref srcRecordInfo, deleteInfo.UsedValueLength, deleteInfo.FullValueLength, fullRecordLength); } status = OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, StatusCode.InPlaceUpdatedRecord); diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index 0de862c3d3d..c46fde18899 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -138,19 +138,33 @@ internal OperationStatus InternalRMW(sessionFunctions, ref stackCtx, ref srcRecordInfo)) + { + HandleRecordElision( + sessionFunctions, ref stackCtx, ref srcRecordInfo, rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength); + } + + goto LatchRelease; + } + if (OperationStatusUtils.BasicOpCode(status) != OperationStatus.SUCCESS) goto LatchRelease; From f535702ba642fe1d0cd5f9872c006d11df0f3a05 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Wed, 7 May 2025 11:47:00 -0700 Subject: [PATCH 02/20] fmt --- .../Tsavorite/cs/src/core/Index/Interfaces/CallbackInfos.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Interfaces/CallbackInfos.cs b/libs/storage/Tsavorite/cs/src/core/Index/Interfaces/CallbackInfos.cs index 6f78b6160b6..81b28f81f91 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Interfaces/CallbackInfos.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Interfaces/CallbackInfos.cs @@ -214,7 +214,7 @@ public struct RMWInfo /// public int FullValueLength { get; internal set; } - public int FullRecordLength { get; internal set; } + public int FullRecordLength { get; internal set; } /// /// If set true by CopyUpdater, the source record for the RCU will not be elided from the tag chain even if this is otherwise possible. From 6d83fb55b879360306865a2e31b49dfa093045dd Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Wed, 7 May 2025 12:09:15 -0700 Subject: [PATCH 03/20] Add pending context linking for audit ability in future --- .../cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index c46fde18899..7cbb09b7744 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -162,6 +162,8 @@ internal OperationStatus InternalRMW Date: Wed, 7 May 2025 15:47:26 -0700 Subject: [PATCH 04/20] more reviv --- .../Storage/Functions/MainStore/RMWMethods.cs | 19 +++----- .../Tsavorite/Implementation/InternalRMW.cs | 32 +++++++++++++- .../Tsavorite/cs/test/RevivificationTests.cs | 3 ++ test/Garnet.test/RespLowMemoryTests.cs | 44 +++++++++++++++++++ 4 files changed, 85 insertions(+), 13 deletions(-) diff --git a/libs/server/Storage/Functions/MainStore/RMWMethods.cs b/libs/server/Storage/Functions/MainStore/RMWMethods.cs index 19117a2d7ce..14f9d7ecf8f 100644 --- a/libs/server/Storage/Functions/MainStore/RMWMethods.cs +++ b/libs/server/Storage/Functions/MainStore/RMWMethods.cs @@ -911,14 +911,16 @@ public bool NeedCopyUpdate(ref SpanByte key, ref RawStringInput input, ref SpanB case RespCommand.DELIFGREATER: if (rmwInfo.RecordInfo.ETag) EtagState.SetValsForRecordWithEtag(ref functionsState.etagState, ref oldValue); + long etagFromClient = input.parseState.GetLong(0); - if (etagFromClient <= functionsState.etagState.etag) + if (etagFromClient > functionsState.etagState.etag) { - EtagState.ResetState(ref functionsState.etagState); - return false; + rmwInfo.Action = RMWAction.ExpireAndStop; } - return true; + EtagState.ResetState(ref functionsState.etagState); + return false; + case RespCommand.SETIFGREATER: case RespCommand.SETIFMATCH: if (rmwInfo.RecordInfo.ETag) @@ -1046,13 +1048,6 @@ public bool CopyUpdater(ref SpanByte key, ref RawStringInput input, ref SpanByte switch (cmd) { - case RespCommand.DELIFGREATER: - // NCU has already checked for making sure the etag is greater than the existing etag by this point - long etagFromClient = input.parseState.GetLong(0); - rmwInfo.Action = RMWAction.ExpireAndStop; - EtagState.ResetState(ref functionsState.etagState); - return false; - case RespCommand.SETIFGREATER: case RespCommand.SETIFMATCH: // By now the comparison for etag against existing etag has already been done in NeedCopyUpdate @@ -1074,7 +1069,7 @@ public bool CopyUpdater(ref SpanByte key, ref RawStringInput input, ref SpanByte Span dest = newValue.AsSpan(EtagConstants.EtagSize); src.CopyTo(dest); - etagFromClient = input.parseState.GetLong(1); + long etagFromClient = input.parseState.GetLong(1); functionsState.etagState.etag = etagFromClient; diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index 7cbb09b7744..8d5c2ef37af 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -151,6 +151,8 @@ internal OperationStatus InternalRMW(sessionFunctions, ref stackCtx, ref srcRecordInfo)) + { + (rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength) = GetRecordLengths(stackCtx.recSrc.PhysicalAddress, ref value, ref srcRecordInfo); + HandleRecordElision( + sessionFunctions, ref stackCtx, ref srcRecordInfo, rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength); + } + } + return OperationStatus.NOTFOUND; + } else return OperationStatus.SUCCESS; } @@ -489,7 +510,7 @@ private OperationStatus CreateNewRecordRMW(sessionFunctions, ref stackCtx, ref newRecordInfo)) + { + (rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength) = GetRecordLengths(newPhysicalAddress, ref newRecordValue, ref newRecordInfo); + HandleRecordElision( + sessionFunctions, ref stackCtx, ref srcRecordInfo, rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength); + } + status = OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, StatusCode.CopyUpdatedRecord | StatusCode.Expired); } else diff --git a/libs/storage/Tsavorite/cs/test/RevivificationTests.cs b/libs/storage/Tsavorite/cs/test/RevivificationTests.cs index 4ec87cdb829..5bc0e2ca39e 100644 --- a/libs/storage/Tsavorite/cs/test/RevivificationTests.cs +++ b/libs/storage/Tsavorite/cs/test/RevivificationTests.cs @@ -460,6 +460,9 @@ internal class RevivificationSpanByteFunctions : SpanByteFunctions internal int expectedSingleFullValueLength = -1; internal int expectedInputLength = InitialLength; + // used to configurably change RMW behavior to test tombstoning via RMW route. + internal bool deleteViaRmw = false; + // This is a queue rather than a single value because there may be calls to, for example, ConcurrentWriter with one length // followed by SingleWriter with another. internal Queue expectedUsedValueLengths = new(); diff --git a/test/Garnet.test/RespLowMemoryTests.cs b/test/Garnet.test/RespLowMemoryTests.cs index f6406f4001a..f8624661e5b 100644 --- a/test/Garnet.test/RespLowMemoryTests.cs +++ b/test/Garnet.test/RespLowMemoryTests.cs @@ -85,5 +85,49 @@ public void PersistCopyUpdateTest() // Verify that key0 exists with correct value ClassicAssert.AreEqual(key0, (string)db.StringGet(key0)); } + + + [Test] + public void RevivificationViaRmw() + { + using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig(allowAdmin: true)); + var db = redis.GetDatabase(0); + var server = redis.GetServer(TestUtils.EndPoint); + var info = TestUtils.GetStoreAddressInfo(server); + + // Start at tail address of 64 + ClassicAssert.AreEqual(64, info.TailAddress); + + var expire = 100; + var key0 = $"key{0:00000}"; + _ = db.StringSet(key0, key0, TimeSpan.FromSeconds(expire)); + + // Record size for key0 is 8 bytes header + 16 bytes key + 16 bytes value + 8 bytes expiry = 48 bytes + // so the new tail address should be 64 + 48 = 112 + // That is, key0 is located at [64, 112) + info = TestUtils.GetStoreAddressInfo(server); + ClassicAssert.AreEqual(112, info.TailAddress); + + // Make the record read-only by adding more records + MakeReadOnly(info.TailAddress, server, db); + + info = TestUtils.GetStoreAddressInfo(server); + var previousTail = info.TailAddress; + + // The first record inserted (key0) is now read-only + ClassicAssert.IsTrue(info.ReadOnlyAddress >= 112); + + // Persist the key, which should cause RMW to CopyUpdate to tail + var response = db.KeyPersist(key0); + ClassicAssert.IsTrue(response); + + // Now key0 is only 40 bytes, as we are removing the expiration + // That is, key0 is now moved to [previousTail, previousTail + 40) + info = TestUtils.GetStoreAddressInfo(server); + ClassicAssert.AreEqual(previousTail + 40, info.TailAddress); + + // Verify that key0 exists with correct value + ClassicAssert.AreEqual(key0, (string)db.StringGet(key0)); + } } } \ No newline at end of file From 115e614db5a7b2497d37ab00d73501d710735e37 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Wed, 7 May 2025 23:34:20 -0700 Subject: [PATCH 05/20] Add tombstoning to via NCU correction --- .../src/core/Allocator/BlittableAllocator.cs | 5 +++ .../core/Allocator/BlittableAllocatorImpl.cs | 4 +++ .../cs/src/core/Allocator/GenericAllocator.cs | 5 +++ .../core/Allocator/GenericAllocatorImpl.cs | 3 ++ .../cs/src/core/Allocator/IAllocator.cs | 3 ++ .../src/core/Allocator/SpanByteAllocator.cs | 2 ++ .../core/Allocator/SpanByteAllocatorImpl.cs | 10 ++++++ .../Tsavorite/Implementation/InternalRMW.cs | 33 ++++++++++++++----- 8 files changed, 56 insertions(+), 9 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocator.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocator.cs index 2d45b275eda..2fcb1832792 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocator.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocator.cs @@ -84,6 +84,11 @@ public readonly (int actualSize, int allocatedSize, int keySize) GetRMWCopyDesti where TVariableLengthInput : IVariableLengthInput => BlittableAllocatorImpl.GetRMWCopyDestinationRecordSize(ref key, ref input, ref value, ref recordInfo, varlenInput); + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public (int actualSize, int allocatedSize, int keySize) GetTombtoneRecordSize(ref TKey key) + => BlittableAllocatorImpl.GetTombstoneRecordSize(ref key); + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly int GetRequiredRecordSize(long physicalAddress, int availableBytes) => GetAverageRecordSize(); diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocatorImpl.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocatorImpl.cs index d60df042f4d..fdca149f000 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocatorImpl.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocatorImpl.cs @@ -86,6 +86,10 @@ void ReturnPage(int index) internal static (int actualSize, int allocatedSize, int keySize) GetRMWCopyDestinationRecordSize(ref TKey key, ref TInput input, ref TValue value, ref RecordInfo recordInfo, TVariableLengthInput varlenInput) => (RecordSize, RecordSize, KeySize); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static (int actualSize, int allocatedSize, int keySize) GetTombstoneRecordSize(ref TKey key) + => (RecordSize, RecordSize, KeySize); + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static (int actualSize, int allocatedSize, int keySize) GetRMWInitialRecordSize(ref TKey key, ref TInput input, TSessionFunctionsWrapper sessionFunctions) => (RecordSize, RecordSize, KeySize); diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocator.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocator.cs index 6afdd1b8ed4..0b04394feed 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocator.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocator.cs @@ -79,6 +79,11 @@ public readonly (int actualSize, int allocatedSize, int keySize) GetRMWCopyDesti where TVariableLengthInput : IVariableLengthInput => _this.GetRMWCopyDestinationRecordSize(ref key, ref input, ref value, ref recordInfo, varlenInput); + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public (int actualSize, int allocatedSize, int keySize) GetTombtoneRecordSize(ref TKey key) + => _this.GetTombstoneRecordSize(ref key); + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly int GetRequiredRecordSize(long physicalAddress, int availableBytes) => GetAverageRecordSize(); diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocatorImpl.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocatorImpl.cs index e52fa9d9f9d..05ab2ca74fb 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocatorImpl.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocatorImpl.cs @@ -142,6 +142,9 @@ internal ref TValue GetValue(long physicalAddress) internal (int actualSize, int allocatedSize, int keySize) GetRMWCopyDestinationRecordSize(ref TKey key, ref TInput input, ref TValue value, ref RecordInfo recordInfo, TVariableLengthInput varlenInput) => (RecordSize, RecordSize, KeySize); + internal (int actualSize, int allocatedSize, int keySize) GetTombstoneRecordSize(ref TKey key) + => (RecordSize, RecordSize, KeySize); + internal int GetAverageRecordSize() => RecordSize; internal int GetFixedRecordSize() => RecordSize; diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/IAllocator.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/IAllocator.cs index f7539b4b0b7..97ae0ec8ff7 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/IAllocator.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/IAllocator.cs @@ -38,6 +38,9 @@ AllocatorBase GetBase() /// Get record size required for the given and (int actualSize, int allocatedSize, int keySize) GetRecordSize(ref TKey key, ref TValue value); + /// Get the record size for a tombstoned record + (int actualSize, int allocatedSize, int keySize) GetTombtoneRecordSize(ref TKey key); + /// Get the size of the given int GetValueLength(ref TValue value); diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs index c2ba87b3795..45a0dc69c62 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs @@ -80,6 +80,8 @@ public readonly (int actualSize, int allocatedSize, int keySize) GetRMWCopyDesti where TVariableLengthInput : IVariableLengthInput => _this.GetRMWCopyDestinationRecordSize(ref key, ref input, ref value, ref recordInfo, varlenInput); + public (int actualSize, int allocatedSize, int keySize) GetTombtoneRecordSize(ref SpanByte key) => _this.GetTombstoneRecordSize(ref key); + /// [MethodImpl(MethodImplOptions.AggressiveInlining)] public readonly int GetRequiredRecordSize(long physicalAddress, int availableBytes) => _this.GetRequiredRecordSize(physicalAddress, availableBytes); diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocatorImpl.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocatorImpl.cs index f01495c1fdb..307f0a5bc47 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocatorImpl.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocatorImpl.cs @@ -131,6 +131,16 @@ public ref SpanByte GetAndInitializeValue(long physicalAddress, long endAddress) return (size, RoundUp(size, Constants.kRecordAlignment), keySize); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal (int actualSize, int allocatedSize, int keySize) GetTombstoneRecordSize(ref SpanByte key) + { + int keySize = key.TotalSize; + // Only metadata space needed since this is going to be used for tombstoning anyway. + int minAllocationForTombstone = sizeof(int); + int size = RecordInfo.GetLength() + RoundUp(keySize, Constants.kRecordAlignment) + minAllocationForTombstone; + return (size, RoundUp(size, Constants.kRecordAlignment), keySize); + } + public int GetRequiredRecordSize(long physicalAddress, int availableBytes) { // We need at least [average record size]... diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index 8d5c2ef37af..2ec3036e75b 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. +using System; using System.Diagnostics; using System.Runtime.CompilerServices; @@ -380,6 +381,7 @@ private OperationStatus CreateNewRecordRMW { bool forExpiration = false; + bool skipNCUtombstoning = true; RetryNow: @@ -410,7 +412,13 @@ private OperationStatus CreateNewRecordRMW( sessionFunctions, ref stackCtx, ref srcRecordInfo, rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength); } + return OperationStatus.NOTFOUND; } - - return OperationStatus.NOTFOUND; } else return OperationStatus.SUCCESS; @@ -439,9 +446,17 @@ private OperationStatus CreateNewRecordRMW Date: Wed, 7 May 2025 23:36:52 -0700 Subject: [PATCH 06/20] rm test --- test/Garnet.test/RespLowMemoryTests.cs | 44 -------------------------- 1 file changed, 44 deletions(-) diff --git a/test/Garnet.test/RespLowMemoryTests.cs b/test/Garnet.test/RespLowMemoryTests.cs index f8624661e5b..f6406f4001a 100644 --- a/test/Garnet.test/RespLowMemoryTests.cs +++ b/test/Garnet.test/RespLowMemoryTests.cs @@ -85,49 +85,5 @@ public void PersistCopyUpdateTest() // Verify that key0 exists with correct value ClassicAssert.AreEqual(key0, (string)db.StringGet(key0)); } - - - [Test] - public void RevivificationViaRmw() - { - using var redis = ConnectionMultiplexer.Connect(TestUtils.GetConfig(allowAdmin: true)); - var db = redis.GetDatabase(0); - var server = redis.GetServer(TestUtils.EndPoint); - var info = TestUtils.GetStoreAddressInfo(server); - - // Start at tail address of 64 - ClassicAssert.AreEqual(64, info.TailAddress); - - var expire = 100; - var key0 = $"key{0:00000}"; - _ = db.StringSet(key0, key0, TimeSpan.FromSeconds(expire)); - - // Record size for key0 is 8 bytes header + 16 bytes key + 16 bytes value + 8 bytes expiry = 48 bytes - // so the new tail address should be 64 + 48 = 112 - // That is, key0 is located at [64, 112) - info = TestUtils.GetStoreAddressInfo(server); - ClassicAssert.AreEqual(112, info.TailAddress); - - // Make the record read-only by adding more records - MakeReadOnly(info.TailAddress, server, db); - - info = TestUtils.GetStoreAddressInfo(server); - var previousTail = info.TailAddress; - - // The first record inserted (key0) is now read-only - ClassicAssert.IsTrue(info.ReadOnlyAddress >= 112); - - // Persist the key, which should cause RMW to CopyUpdate to tail - var response = db.KeyPersist(key0); - ClassicAssert.IsTrue(response); - - // Now key0 is only 40 bytes, as we are removing the expiration - // That is, key0 is now moved to [previousTail, previousTail + 40) - info = TestUtils.GetStoreAddressInfo(server); - ClassicAssert.AreEqual(previousTail + 40, info.TailAddress); - - // Verify that key0 exists with correct value - ClassicAssert.AreEqual(key0, (string)db.StringGet(key0)); - } } } \ No newline at end of file From 39b40920c6cca2dc88a0cbe28b72717bbe516431 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Wed, 7 May 2025 23:42:33 -0700 Subject: [PATCH 07/20] fmt --- .../cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index 2ec3036e75b..f6d931e36d2 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -1,7 +1,6 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -using System; using System.Diagnostics; using System.Runtime.CompilerServices; From d3411f285ebe9f83298fc22b9d5715295c6b542a Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Wed, 7 May 2025 23:44:32 -0700 Subject: [PATCH 08/20] fmt --- .../storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs index 45a0dc69c62..df62d6e00b1 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs @@ -80,6 +80,7 @@ public readonly (int actualSize, int allocatedSize, int keySize) GetRMWCopyDesti where TVariableLengthInput : IVariableLengthInput => _this.GetRMWCopyDestinationRecordSize(ref key, ref input, ref value, ref recordInfo, varlenInput); + /// public (int actualSize, int allocatedSize, int keySize) GetTombtoneRecordSize(ref SpanByte key) => _this.GetTombstoneRecordSize(ref key); /// From 8dc24683f9522f0ae43772cbfd3038e9caaf6451 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Thu, 8 May 2025 11:24:00 -0700 Subject: [PATCH 09/20] fix status --- .../Storage/Functions/MainStore/RMWMethods.cs | 3 ++ .../src/core/Allocator/BlittableAllocator.cs | 2 +- .../cs/src/core/Allocator/GenericAllocator.cs | 2 +- .../cs/src/core/Allocator/IAllocator.cs | 2 +- .../src/core/Allocator/SpanByteAllocator.cs | 2 +- .../Tsavorite/Implementation/InternalRMW.cs | 34 ++++++++++++------- 6 files changed, 28 insertions(+), 17 deletions(-) diff --git a/libs/server/Storage/Functions/MainStore/RMWMethods.cs b/libs/server/Storage/Functions/MainStore/RMWMethods.cs index 14f9d7ecf8f..701866558bd 100644 --- a/libs/server/Storage/Functions/MainStore/RMWMethods.cs +++ b/libs/server/Storage/Functions/MainStore/RMWMethods.cs @@ -919,6 +919,9 @@ public bool NeedCopyUpdate(ref SpanByte key, ref RawStringInput input, ref SpanB } EtagState.ResetState(ref functionsState.etagState); + // We always return false because we would rather not create a new record in hybrid log if we don't need to delete the object. + // Setting no Action and returning false for non-delete case will shortcircuit the InternalRMW code to not run CU, and return SUCCESS. + // If we want to delete the object setting the Action to ExpireAndStop will add the tombstone in hybrid log for us. return false; case RespCommand.SETIFGREATER: diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocator.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocator.cs index 2fcb1832792..54f48093764 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocator.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/BlittableAllocator.cs @@ -86,7 +86,7 @@ public readonly (int actualSize, int allocatedSize, int keySize) GetRMWCopyDesti /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public (int actualSize, int allocatedSize, int keySize) GetTombtoneRecordSize(ref TKey key) + public (int actualSize, int allocatedSize, int keySize) GetTombstoneRecordSize(ref TKey key) => BlittableAllocatorImpl.GetTombstoneRecordSize(ref key); /// diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocator.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocator.cs index 0b04394feed..bc14667d390 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocator.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/GenericAllocator.cs @@ -81,7 +81,7 @@ public readonly (int actualSize, int allocatedSize, int keySize) GetRMWCopyDesti /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public (int actualSize, int allocatedSize, int keySize) GetTombtoneRecordSize(ref TKey key) + public (int actualSize, int allocatedSize, int keySize) GetTombstoneRecordSize(ref TKey key) => _this.GetTombstoneRecordSize(ref key); /// diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/IAllocator.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/IAllocator.cs index 97ae0ec8ff7..967e115ac6c 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/IAllocator.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/IAllocator.cs @@ -39,7 +39,7 @@ AllocatorBase GetBase() (int actualSize, int allocatedSize, int keySize) GetRecordSize(ref TKey key, ref TValue value); /// Get the record size for a tombstoned record - (int actualSize, int allocatedSize, int keySize) GetTombtoneRecordSize(ref TKey key); + (int actualSize, int allocatedSize, int keySize) GetTombstoneRecordSize(ref TKey key); /// Get the size of the given int GetValueLength(ref TValue value); diff --git a/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs b/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs index df62d6e00b1..4949003aae2 100644 --- a/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs +++ b/libs/storage/Tsavorite/cs/src/core/Allocator/SpanByteAllocator.cs @@ -81,7 +81,7 @@ public readonly (int actualSize, int allocatedSize, int keySize) GetRMWCopyDesti => _this.GetRMWCopyDestinationRecordSize(ref key, ref input, ref value, ref recordInfo, varlenInput); /// - public (int actualSize, int allocatedSize, int keySize) GetTombtoneRecordSize(ref SpanByte key) => _this.GetTombstoneRecordSize(ref key); + public (int actualSize, int allocatedSize, int keySize) GetTombstoneRecordSize(ref SpanByte key) => _this.GetTombstoneRecordSize(ref key); /// [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index f6d931e36d2..d25d444bc2a 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -380,7 +380,7 @@ private OperationStatus CreateNewRecordRMW { bool forExpiration = false; - bool skipNCUtombstoning = true; + bool skipTombstoneAddition = true; RetryNow: @@ -413,9 +413,9 @@ private OperationStatus CreateNewRecordRMW( - sessionFunctions, ref stackCtx, ref srcRecordInfo, rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength); + sessionFunctions, ref stackCtx, ref newRecordInfo, rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength); } - - status = OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, StatusCode.CopyUpdatedRecord | StatusCode.Expired); + + // retain status set by NCU or CU if tombstone addition was requested, else override it what should have been after PCU + if (skipTombstoneAddition) + status = OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, StatusCode.CopyUpdatedRecord | StatusCode.Expired); } else { From 8180d1c4c5033dcbe60fa53b271400eb44719c20 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Thu, 8 May 2025 11:40:15 -0700 Subject: [PATCH 10/20] FMT --- .../src/core/Index/Tsavorite/Implementation/InternalRMW.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index d25d444bc2a..379ffc3ef78 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -556,10 +556,10 @@ private OperationStatus CreateNewRecordRMW( sessionFunctions, ref stackCtx, ref newRecordInfo, rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength); } - + // retain status set by NCU or CU if tombstone addition was requested, else override it what should have been after PCU if (skipTombstoneAddition) status = OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, StatusCode.CopyUpdatedRecord | StatusCode.Expired); From 41da456dd66139b861ca5c0466bb3fdbd9f5a08c Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Thu, 8 May 2025 22:24:20 -0700 Subject: [PATCH 11/20] meow --- .../Tsavorite/Implementation/InternalRMW.cs | 46 ++---- .../Tsavorite/cs/test/RevivificationTests.cs | 136 +++++++++++++++++- 2 files changed, 142 insertions(+), 40 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index 379ffc3ef78..bda9a8b6422 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -152,6 +152,7 @@ internal OperationStatus InternalRMW(sessionFunctions, ref stackCtx, ref srcRecordInfo)) - { - (rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength) = GetRecordLengths(stackCtx.recSrc.PhysicalAddress, ref value, ref srcRecordInfo); - HandleRecordElision( - sessionFunctions, ref stackCtx, ref srcRecordInfo, rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength); - } - return OperationStatus.NOTFOUND; - } + // since past record is in stable region, we need to explicitly add a new tombstone record + // action is already set to expire and stop, by simply changing the allocation based on this, we can let record splicing, elision, and potential revivification continue + skipTombstoneAddition = false; } else return OperationStatus.SUCCESS; @@ -524,8 +506,8 @@ private OperationStatus CreateNewRecordRMW(sessionFunctions, ref stackCtx, ref newRecordInfo)) - { - (rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength) = GetRecordLengths(newPhysicalAddress, ref newRecordValue, ref newRecordInfo); - HandleRecordElision( - sessionFunctions, ref stackCtx, ref newRecordInfo, rmwInfo.UsedValueLength, rmwInfo.FullValueLength, rmwInfo.FullRecordLength); - } - - // retain status set by NCU or CU if tombstone addition was requested, else override it what should have been after PCU - if (skipTombstoneAddition) - status = OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, StatusCode.CopyUpdatedRecord | StatusCode.Expired); + status = OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, StatusCode.CopyUpdatedRecord | StatusCode.Expired); } else { diff --git a/libs/storage/Tsavorite/cs/test/RevivificationTests.cs b/libs/storage/Tsavorite/cs/test/RevivificationTests.cs index 5bc0e2ca39e..542ec5caab6 100644 --- a/libs/storage/Tsavorite/cs/test/RevivificationTests.cs +++ b/libs/storage/Tsavorite/cs/test/RevivificationTests.cs @@ -461,7 +461,9 @@ internal class RevivificationSpanByteFunctions : SpanByteFunctions internal int expectedInputLength = InitialLength; // used to configurably change RMW behavior to test tombstoning via RMW route. - internal bool deleteViaRmw = false; + internal bool deleteInIpu = false; + internal bool deleteInNCU = false; + internal bool deleteInCU = false; // This is a queue rather than a single value because there may be calls to, for example, ConcurrentWriter with one length // followed by SingleWriter with another. @@ -539,9 +541,28 @@ public override bool InitialUpdater(ref SpanByte key, ref SpanByte input, ref Sp return base.InitialUpdater(ref key, ref input, ref value, ref output, ref rmwInfo, ref recordInfo); } + public override bool NeedCopyUpdate(ref SpanByte key, ref SpanByte input, ref SpanByte oldValue, ref SpanByteAndMemory output, ref RMWInfo rmwInfo) + { + if (deleteInNCU) + { + rmwInfo.Action = RMWAction.ExpireAndStop; + return false; + } + + return base.NeedCopyUpdate(ref key, ref input, ref oldValue, ref output, ref rmwInfo); + } + public override bool CopyUpdater(ref SpanByte key, ref SpanByte input, ref SpanByte oldValue, ref SpanByte newValue, ref SpanByteAndMemory output, ref RMWInfo rmwInfo, ref RecordInfo recordInfo) { + if (deleteInCU) + { + rmwInfo.Action = RMWAction.ExpireAndStop; + return false; + } + AssertInfoValid(ref rmwInfo); + + ClassicAssert.AreEqual(expectedInputLength, input.Length); var expectedUsedValueLength = expectedUsedValueLengths.Dequeue(); @@ -564,6 +585,13 @@ public override bool CopyUpdater(ref SpanByte key, ref SpanByte input, ref SpanB public override bool InPlaceUpdater(ref SpanByte key, ref SpanByte input, ref SpanByte value, ref SpanByteAndMemory output, ref RMWInfo rmwInfo, ref RecordInfo recordInfo) { AssertInfoValid(ref rmwInfo); + + if (deleteInIpu) + { + rmwInfo.Action = RMWAction.ExpireAndStop; + return false; + } + ClassicAssert.AreEqual(expectedInputLength, input.Length); ClassicAssert.AreEqual(expectedConcurrentDestLength, value.Length); ClassicAssert.AreEqual(expectedConcurrentFullValueLength, rmwInfo.FullValueLength); @@ -816,10 +844,57 @@ public void SpanByteNoRevivLengthTest([Values(UpdateOp.Upsert, UpdateOp.RMW)] Up } } + internal enum DeletionRoutes + { + DELETE, + RMW_IPU, + RMW_NCU, + RMW_CU + } + + private Status DeleteViaRMW(ref SpanByte key, Span mockInputVec, byte fillByte) + { + var mockInput = SpanByte.FromPinnedSpan(mockInputVec); + mockInputVec.Fill(fillByte); + return bContext.RMW(ref key, ref mockInput); + } + + private Status PerformDeletion(DeletionRoutes deletionRoute, ref SpanByte key, byte fillByte) + { + Status status; + switch (deletionRoute) + { + case DeletionRoutes.DELETE: + return bContext.Delete(ref key); + case DeletionRoutes.RMW_IPU: + functions.deleteInIpu = true; + Span mockInputVec = stackalloc byte[InitialLength]; + status = DeleteViaRMW(ref key, mockInputVec, fillByte); + functions.deleteInIpu = false; + break; + case DeletionRoutes.RMW_NCU: + functions.deleteInNCU = true; + mockInputVec = stackalloc byte[InitialLength]; + status = DeleteViaRMW(ref key, mockInputVec, fillByte); + functions.deleteInNCU = false; + break; + case DeletionRoutes.RMW_CU: + functions.deleteInCU = true; + mockInputVec = stackalloc byte[InitialLength]; + status = DeleteViaRMW(ref key, mockInputVec, fillByte); + functions.deleteInCU = false; + break; + default: + throw new Exception("Unhandled deletion logic"); + } + + return status; + } + [Test] [Category(RevivificationCategory)] [Category(SmokeTestCategory)] - public void SpanByteSimpleTest([Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp updateOp) + public void SpanByteSimpleTest([Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp updateOp, [Values(DeletionRoutes.DELETE, DeletionRoutes.RMW_IPU)] DeletionRoutes deletionRoute) { Populate(); @@ -831,7 +906,9 @@ public void SpanByteSimpleTest([Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp var key = SpanByte.FromPinnedSpan(keyVec); functions.expectedUsedValueLengths.Enqueue(SpanByteTotalSize(InitialLength)); - var status = bContext.Delete(ref key); + + Status status = PerformDeletion(deletionRoute, ref key, fillByte); + ClassicAssert.IsTrue(status.Found, status.ToString()); ClassicAssert.AreEqual(tailAddress, store.Log.TailAddress); @@ -854,6 +931,59 @@ public void SpanByteSimpleTest([Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp ClassicAssert.AreEqual(tailAddress, store.Log.TailAddress); } + /* + * Create a record and tombstone it at RCU or NCU layer + * */ + [Test] + [Category(RevivificationCategory)] + [Category(SmokeTestCategory)] + public void SpanByteDeletionViaRMWRevivifiesOriginalRecordAfterTombstoning( + [Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp updateOp, [Values(DeletionRoutes.RMW_NCU, DeletionRoutes.RMW_CU)] DeletionRoutes deletionRoute) + { + + // To make the old record revivifiable via the RCU route. I need to be able to do an RMW where a mutable region record goes down CPR route. + Populate(); + + + Span keyVec = stackalloc byte[KeyLength]; + byte fillByte = 42; + keyVec.Fill(fillByte); + var key = SpanByte.FromPinnedSpan(keyVec); + + functions.expectedUsedValueLengths.Enqueue(SpanByteTotalSize(InitialLength)); + + // TODO: do some fuckery to force RMW to go via RCU via CPR route + var status = PerformDeletion(deletionRoute, ref key, fillByte); + + RevivificationTestUtils.WaitForRecords(store, want: true); + + ClassicAssert.AreEqual(1, RevivificationTestUtils.GetFreeRecordCount(store)); + + ClassicAssert.IsTrue(status.Found, status.ToString()); + + var tailAddress = store.Log.TailAddress; + + Span inputVec = stackalloc byte[InitialLength]; + var input = SpanByte.FromPinnedSpan(inputVec); + inputVec.Fill(fillByte); + + SpanByteAndMemory output = new(); + + functions.expectedInputLength = InitialLength; + functions.expectedSingleDestLength = InitialLength; + functions.expectedConcurrentDestLength = InitialLength; + functions.expectedSingleFullValueLength = functions.expectedConcurrentFullValueLength = RoundUpSpanByteFullValueLength(input); + functions.expectedUsedValueLengths.Enqueue(SpanByteTotalSize(InitialLength)); + + _ = updateOp == UpdateOp.Upsert ? bContext.Upsert(ref key, ref input, ref input, ref output) : bContext.RMW(ref key, ref input); + + // since above would use revivification free list we should see no change of tail address. + ClassicAssert.AreEqual(store.Log.TailAddress, tailAddress); + ClassicAssert.AreEqual(tailAddress, store.Log.TailAddress); + ClassicAssert.AreEqual(0, RevivificationTestUtils.GetFreeRecordCount(store)); + output.Memory?.Dispose(); + } + [Test] [Category(RevivificationCategory)] [Category(SmokeTestCategory)] From 150fbbbaf98255a111447a1458b1a0920d96ccfe Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Thu, 8 May 2025 23:00:52 -0700 Subject: [PATCH 12/20] fix comment --- .../cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index bda9a8b6422..bf150a0e1dc 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -409,9 +409,8 @@ private OperationStatus CreateNewRecordRMW Date: Thu, 8 May 2025 23:49:09 -0700 Subject: [PATCH 13/20] coded tests --- .../Tsavorite/Implementation/InternalRMW.cs | 3 +-- .../Tsavorite/cs/test/RevivificationTests.cs | 21 ++++++++++++------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index bf150a0e1dc..c189e907c52 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -201,7 +201,6 @@ internal OperationStatus InternalRMW internal bool deleteInIpu = false; internal bool deleteInNCU = false; internal bool deleteInCU = false; + internal bool forceSkipIpu = false; // This is a queue rather than a single value because there may be calls to, for example, ConcurrentWriter with one length // followed by SingleWriter with another. @@ -586,6 +587,9 @@ public override bool InPlaceUpdater(ref SpanByte key, ref SpanByte input, ref Sp { AssertInfoValid(ref rmwInfo); + if (forceSkipIpu) + return false; + if (deleteInIpu) { rmwInfo.Action = RMWAction.ExpireAndStop; @@ -931,20 +935,14 @@ public void SpanByteSimpleTest([Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp ClassicAssert.AreEqual(tailAddress, store.Log.TailAddress); } - /* - * Create a record and tombstone it at RCU or NCU layer - * */ [Test] [Category(RevivificationCategory)] [Category(SmokeTestCategory)] - public void SpanByteDeletionViaRMWRevivifiesOriginalRecordAfterTombstoning( + public void SpanByteDeletionViaRMWRCURevivifiesOriginalRecordAfterTombstoning( [Values(UpdateOp.Upsert, UpdateOp.RMW)] UpdateOp updateOp, [Values(DeletionRoutes.RMW_NCU, DeletionRoutes.RMW_CU)] DeletionRoutes deletionRoute) { - - // To make the old record revivifiable via the RCU route. I need to be able to do an RMW where a mutable region record goes down CPR route. Populate(); - Span keyVec = stackalloc byte[KeyLength]; byte fillByte = 42; keyVec.Fill(fillByte); @@ -952,8 +950,9 @@ public void SpanByteDeletionViaRMWRevivifiesOriginalRecordAfterTombstoning( functions.expectedUsedValueLengths.Enqueue(SpanByteTotalSize(InitialLength)); - // TODO: do some fuckery to force RMW to go via RCU via CPR route + functions.forceSkipIpu = true; var status = PerformDeletion(deletionRoute, ref key, fillByte); + functions.forceSkipIpu = false; RevivificationTestUtils.WaitForRecords(store, want: true); @@ -975,6 +974,12 @@ public void SpanByteDeletionViaRMWRevivifiesOriginalRecordAfterTombstoning( functions.expectedSingleFullValueLength = functions.expectedConcurrentFullValueLength = RoundUpSpanByteFullValueLength(input); functions.expectedUsedValueLengths.Enqueue(SpanByteTotalSize(InitialLength)); + // brand new value so we try to use a record out of free list + keyVec = stackalloc byte[KeyLength]; + fillByte = 255; + keyVec.Fill(fillByte); + key = SpanByte.FromPinnedSpan(keyVec); + _ = updateOp == UpdateOp.Upsert ? bContext.Upsert(ref key, ref input, ref input, ref output) : bContext.RMW(ref key, ref input); // since above would use revivification free list we should see no change of tail address. From 5e40387adca022de372bfa3a700712deaf605048 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Thu, 8 May 2025 23:53:37 -0700 Subject: [PATCH 14/20] update comments --- .../cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index c189e907c52..60f6ad9a334 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -535,7 +535,7 @@ private OperationStatus CreateNewRecordRMW Date: Fri, 9 May 2025 00:00:10 -0700 Subject: [PATCH 15/20] fix dirty and modified setting --- .../src/core/Index/Tsavorite/Implementation/InternalRMW.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index 60f6ad9a334..7d79815847a 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -409,7 +409,6 @@ private OperationStatus CreateNewRecordRMW Date: Fri, 9 May 2025 12:24:42 -0700 Subject: [PATCH 16/20] WIP --- .../Tsavorite/Implementation/InternalRMW.cs | 37 ++++++++++++++----- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index 7d79815847a..f0272c06a26 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -393,6 +393,12 @@ private OperationStatus CreateNewRecordRMW(sessionFunctions, ref stackCtx, ref srcRecordInfo) + }; + // Perform Need* if (doingCU) { @@ -408,7 +414,18 @@ private OperationStatus CreateNewRecordRMW( + sessionFunctions, ref stackCtx, ref srcRecordInfo, oldRecordLengths.usedValueLength, oldRecordLengths.fullValueLength, oldRecordLengths.fullRecordLength); + // no new record created and hash entry is empty now + return OperationStatusUtils.AdvancedOpCode(OperationStatus.SUCCESS, StatusCode.Found | StatusCode.Expired); + } + // otherwise we shall continue down the tombstoning path skipTombstoneAddition = false; } else @@ -431,17 +448,13 @@ private OperationStatus CreateNewRecordRMW(sessionFunctions, ref stackCtx, ref srcRecordInfo) - }; - if (!TryAllocateRecord(sessionFunctions, ref pendingContext, ref stackCtx, actualSize, ref allocatedSize, keySize, allocOptions, out long newLogicalAddress, out long newPhysicalAddress, out OperationStatus status)) return status; @@ -503,6 +516,8 @@ private OperationStatus CreateNewRecordRMW Date: Fri, 9 May 2025 13:13:36 -0700 Subject: [PATCH 17/20] FMT --- .../src/core/Index/Tsavorite/Implementation/InternalRMW.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index f0272c06a26..261c8e269ab 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -417,7 +417,7 @@ private OperationStatus CreateNewRecordRMW( @@ -448,7 +448,7 @@ private OperationStatus CreateNewRecordRMW Date: Fri, 9 May 2025 16:04:14 -0700 Subject: [PATCH 18/20] feedback --- .../cs/src/core/Index/Tsavorite/Implementation/Helpers.cs | 4 +++- .../src/core/Index/Tsavorite/Implementation/InternalRMW.cs | 5 ----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/Helpers.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/Helpers.cs index 52dbfd5cb49..05b6dd99952 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/Helpers.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/Helpers.cs @@ -280,7 +280,9 @@ private void HandleRecordElision(sessionFunctions, ref stackCtx, ref srcRecordInfo, (usedValueLength, fullValueLength, fullRecordLength)); diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index 261c8e269ab..b51fcd687fb 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -151,7 +151,6 @@ internal OperationStatus InternalRMW( @@ -557,8 +554,6 @@ private OperationStatus CreateNewRecordRMW Date: Fri, 9 May 2025 16:45:34 -0700 Subject: [PATCH 19/20] tomtone --- .../cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index b51fcd687fb..cda3ea23a67 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -415,6 +415,7 @@ private OperationStatus CreateNewRecordRMW( From 130015c9f9a8c45328ae6566ee8be11ee6e3bc99 Mon Sep 17 00:00:00 2001 From: Hamdaan Khalid Date: Sat, 10 May 2025 11:05:30 -0700 Subject: [PATCH 20/20] fix feedback --- .../Tsavorite/Implementation/InternalRMW.cs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs index cda3ea23a67..72c47819d7a 100644 --- a/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs +++ b/libs/storage/Tsavorite/cs/src/core/Index/Tsavorite/Implementation/InternalRMW.cs @@ -379,7 +379,7 @@ private OperationStatus CreateNewRecordRMW { bool forExpiration = false; - bool skipTombstoneAddition = true; + bool addTombstone = false; RetryNow: @@ -416,6 +416,7 @@ private OperationStatus CreateNewRecordRMW( @@ -424,7 +425,7 @@ private OperationStatus CreateNewRecordRMW