diff --git a/Microsoft.Azure.Cosmos/src/Routing/CollectionCache.cs b/Microsoft.Azure.Cosmos/src/Routing/CollectionCache.cs index caf490a8bb..ea5f6f3826 100644 --- a/Microsoft.Azure.Cosmos/src/Routing/CollectionCache.cs +++ b/Microsoft.Azure.Cosmos/src/Routing/CollectionCache.cs @@ -150,8 +150,18 @@ public virtual async Task ResolveCollectionAsync( return collectionInfo; } - if (request.RequestContext.ResolvedCollectionRid == null) + if (request.RequestContext.ResolvedCollectionRid == null + || !CollectionCache.IsCollectionRid(request.RequestContext.ResolvedCollectionRid)) { + if (request.RequestContext.ResolvedCollectionRid != null) + { + DefaultTrace.TraceWarning( + "ResolvedCollectionRid '{0}' for resource '{1}' is not a collection RID; falling back to name-based resolution. ActivityId: '{2}'", + request.RequestContext.ResolvedCollectionRid, + request.ResourceAddress, + System.Diagnostics.Trace.CorrelationManager.ActivityId); + } + collectionInfo = await this.ResolveByNameAsync( apiVersion: request.Headers[HttpConstants.HttpHeaders.Version], @@ -169,6 +179,15 @@ await this.ResolveByNameAsync( collectionInfo.ResourceId, System.Diagnostics.Trace.CorrelationManager.ActivityId); + if (!CollectionCache.IsCollectionRid(collectionInfo.ResourceId)) + { + throw new InvalidOperationException( + $"Resolved resource '{request.ResourceAddress}' has a non-collection ResourceId " + + $"'{collectionInfo.ResourceId}'. This indicates the server returned a database or " + + $"other resource RID instead of a collection RID. ActivityId: " + + $"'{System.Diagnostics.Trace.CorrelationManager.ActivityId}'."); + } + request.ResourceId = collectionInfo.ResourceId; request.RequestContext.ResolvedCollectionRid = collectionInfo.ResourceId; } @@ -358,6 +377,24 @@ protected InternalCache GetCache(string apiVersion) return this.cacheByApiList[0]; } + internal static bool IsCollectionRid(string resourceId) + { + if (string.IsNullOrWhiteSpace(resourceId) || + !ResourceId.TryParse(resourceId, out ResourceId resourceIdParsed)) + { + return false; + } + + string databaseRid = resourceIdParsed.DatabaseId.ToString(); + if (StringComparer.Ordinal.Equals(databaseRid, resourceId)) + { + return false; + } + + string collectionRid = resourceIdParsed.DocumentCollectionId.ToString(); + return StringComparer.Ordinal.Equals(collectionRid, resourceId); + } + private sealed class CollectionRidComparer : IEqualityComparer { public bool Equals(ContainerProperties left, ContainerProperties right) @@ -382,4 +419,4 @@ public int GetHashCode(ContainerProperties collection) } } -} \ No newline at end of file +} diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CollectionCacheEmulatorTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CollectionCacheEmulatorTests.cs new file mode 100644 index 0000000000..b1102a3408 --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.EmulatorTests/CollectionCacheEmulatorTests.cs @@ -0,0 +1,210 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.SDK.EmulatorTests +{ + using System; + using System.Net; + using System.Threading; + using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Common; + using Microsoft.Azure.Cosmos.Routing; + using Microsoft.Azure.Cosmos.Tracing; + using Microsoft.Azure.Documents; + using Microsoft.VisualStudio.TestTools.UnitTesting; + + /// + /// Emulator tests that exercise behaviour + /// directly, so that edge-case paths (e.g. a database RID leaking into + /// ResolvedCollectionRid) can be verified end-to-end against the real + /// Cosmos emulator. + /// + [TestClass] + public class CollectionCacheEmulatorTests + { + private CosmosClient cosmosClient; + private Cosmos.Database database; + private Container container; + private ContainerProperties containerProperties; + + [TestInitialize] + public async Task TestInitialize() + { + this.cosmosClient = TestCommon.CreateCosmosClient(); + this.database = await this.cosmosClient.CreateDatabaseAsync(Guid.NewGuid().ToString()); + this.containerProperties = new ContainerProperties( + id: Guid.NewGuid().ToString(), + partitionKeyPath: "/pk"); + ContainerResponse containerResponse = await this.database.CreateContainerAsync(this.containerProperties); + this.container = containerResponse.Container; + } + + [TestCleanup] + public async Task TestCleanup() + { + if (this.database != null) + { + await this.database.DeleteStreamAsync(); + } + + this.cosmosClient?.Dispose(); + } + + /// + /// Repro for the scenario where ResolvedCollectionRid is set to a + /// database-level RID instead of a collection-level RID. + /// + /// SCENARIO + /// -------- + /// In some code paths (e.g. via + /// or a stale cache after container delete+recreate) the + /// DocumentServiceRequest.RequestContext.ResolvedCollectionRid field can + /// end up holding the database RID (4-byte form, e.g. "jy2ekg==") rather than + /// a proper collection RID (8-byte form, e.g. "jy2eklxnboe="). When that + /// happens, the subsequent RID-based cache look-up fails or routes to the + /// wrong container. + /// + /// FIX + /// --- + /// now: + /// 1. Logs a TraceWarning when it detects that the pre-existing + /// ResolvedCollectionRid is not a collection RID. + /// 2. Falls back to name-based resolution in that case. + /// 3. Throws at the assignment site + /// if the resolved ResourceId is still not a collection RID (guards against + /// a corrupt response from the server). + /// + /// TEST + /// ---- + /// This test injects the real database RID into + /// request.RequestContext.ResolvedCollectionRid to simulate the bug, + /// then verifies that + /// still returns the correct (with the + /// proper collection RID) via the name-based fallback. + /// + [TestMethod] + public async Task ResolveCollectionAsync_WithDatabaseRidInResolvedCollectionRid_FallsBackToNameResolutionReproTest() + { + // Arrange: get the real database RID from the emulator. + DatabaseResponse databaseResponse = await this.database.ReadAsync(); + string databaseRid = databaseResponse.Resource.ResourceId; + Assert.IsNotNull(databaseRid, "Database ResourceId must not be null"); + + // Sanity-check: the database RID must NOT be a collection RID. + // (A database RID is 4 bytes base64-encoded; a collection RID is 8 bytes.) + Assert.IsFalse( + IsCollectionRid(databaseRid), + $"Expected '{databaseRid}' to be a database RID, not a collection RID."); + + // Get the container's actual collection RID so we can assert against it. + ContainerResponse containerResponse = await this.container.ReadContainerAsync(); + string expectedCollectionRid = containerResponse.Resource.ResourceId; + Assert.IsNotNull(expectedCollectionRid, "Container ResourceId must not be null"); + Assert.IsTrue( + IsCollectionRid(expectedCollectionRid), + $"Expected '{expectedCollectionRid}' to be a valid collection RID."); + + // Build a name-based DocumentServiceRequest that simulates a read on a + // document inside the container so the request goes through the + // IsNameBased path of CollectionCache.ResolveCollectionAsync. + string documentPath = $"dbs/{this.database.Id}/colls/{this.container.Id}/docs/someDoc"; + using DocumentServiceRequest request = DocumentServiceRequest.CreateFromName( + OperationType.Read, + documentPath, + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey, + null); + + // Simulate the bug: set ResolvedCollectionRid to the database RID. + request.RequestContext.ResolvedCollectionRid = databaseRid; + + // Act: call the collection cache directly. + ClientCollectionCache collectionCache = + await this.cosmosClient.DocumentClient.GetCollectionCacheAsync(NoOpTrace.Singleton); + + ContainerProperties resolved = await collectionCache.ResolveCollectionAsync( + request, + CancellationToken.None, + NoOpTrace.Singleton); + + // Assert: the cache must have fallen back to name-based resolution and + // returned the correct collection RID — not the database RID. + Assert.IsNotNull(resolved, "ResolveCollectionAsync must return a non-null ContainerProperties"); + Assert.AreEqual( + expectedCollectionRid, + resolved.ResourceId, + $"Expected collection RID '{expectedCollectionRid}' but got '{resolved.ResourceId}'. " + + "The cache should have fallen back to name-based resolution and returned the correct collection RID."); + + Assert.IsTrue( + IsCollectionRid(resolved.ResourceId), + $"Resolved ResourceId '{resolved.ResourceId}' must be a collection RID, not a database RID."); + + Assert.AreNotEqual( + databaseRid, + resolved.ResourceId, + "Resolved ResourceId must not be the database RID that was injected."); + } + + /// + /// Verifies that end-to-end item operations (read/write) still succeed on a + /// container whose collection cache was primed with a database RID. This + /// tests the full SDK retry stack rather than just the cache layer. + /// + [TestMethod] + public async Task ItemRead_AfterDatabaseRidInjectedIntoCollectionCache_Succeeds() + { + // Create a test item so we have something to read back. + string pk = Guid.NewGuid().ToString("N"); + string id = Guid.NewGuid().ToString("N"); + var testItem = new { id, pk }; + await this.container.CreateItemAsync(testItem, new Cosmos.PartitionKey(pk)); + + // Get the database RID from the emulator. + DatabaseResponse databaseResponse = await this.database.ReadAsync(); + string databaseRid = databaseResponse.Resource.ResourceId; + + // Warm up the collection cache so that subsequent reads use the cache + // hit path; this ensures the cache is initialised before we corrupt it. + ContainerResponse containerResponse = await this.container.ReadContainerAsync(); + string expectedCollectionRid = containerResponse.Resource.ResourceId; + + // Build the request and inject the database RID as ResolvedCollectionRid. + string documentPath = $"dbs/{this.database.Id}/colls/{this.container.Id}/docs/{id}"; + using DocumentServiceRequest request = DocumentServiceRequest.CreateFromName( + OperationType.Read, + documentPath, + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey, + null); + + request.RequestContext.ResolvedCollectionRid = databaseRid; + + ClientCollectionCache collectionCache = + await this.cosmosClient.DocumentClient.GetCollectionCacheAsync(NoOpTrace.Singleton); + + // The cache must resolve to the correct collection RID via name-based fallback. + ContainerProperties resolved = await collectionCache.ResolveCollectionAsync( + request, + CancellationToken.None, + NoOpTrace.Singleton); + + Assert.AreEqual(expectedCollectionRid, resolved.ResourceId); + + // The real item read through the full SDK stack must also succeed. + ItemResponse itemResponse = await this.container.ReadItemAsync( + id, + new Cosmos.PartitionKey(pk)); + + Assert.AreEqual(HttpStatusCode.OK, itemResponse.StatusCode); + } + + /// + /// Delegates to which is now + /// internal, so tests in this assembly can use it directly. + /// + private static bool IsCollectionRid(string resourceId) + => CollectionCache.IsCollectionRid(resourceId); + } +} diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CollectionCacheTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CollectionCacheTests.cs new file mode 100644 index 0000000000..90e945c13a --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/CollectionCacheTests.cs @@ -0,0 +1,152 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Tests +{ + using System; + using System.Threading; + using System.Threading.Tasks; + using Microsoft.Azure.Cosmos.Common; + using Microsoft.Azure.Cosmos.Core.Trace; + using Microsoft.Azure.Cosmos.Tracing; + using Microsoft.Azure.Documents; + using Microsoft.VisualStudio.TestTools.UnitTesting; + + [TestClass] + public class CollectionCacheTests + { + // A real collection RID from the existing test suite (same value used in FeedRangeTests + // and CosmosBadReplicaTests). ResourceId.Parse accepts this value, so it exercises the + // actual binary round-trip through the Direct-package parser. + private const string CollectionRid = "ccZ1ANCszwk="; + + [TestMethod] + public async Task ResolveCollectionAsync_WithDatabaseRidInResolvedCollectionRid_FallsBackToNameResolution() + { + // Derive a real database RID from the known collection RID so that + // ResourceId.TryParse recognises it as a valid (but database-level) RID. + string databaseRid = ResourceId.Parse(CollectionRid).DatabaseId.ToString(); + + TestCollectionCache cache = new TestCollectionCache(CollectionRid); + using DocumentServiceRequest request = DocumentServiceRequest.CreateFromName( + OperationType.Read, + "dbs/db1/colls/c1/docs/d1", + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey, + null); + request.RequestContext.ResolvedCollectionRid = databaseRid; + + ContainerProperties resolved = await cache.ResolveCollectionAsync( + request, + CancellationToken.None, + NoOpTrace.Singleton); + + Assert.AreEqual(CollectionRid, resolved.ResourceId); + Assert.AreEqual(1, cache.NameLookupCount); + Assert.AreEqual(0, cache.RidLookupCount); + } + + [TestMethod] + public async Task ResolveCollectionAsync_WithCollectionRidInResolvedCollectionRid_UsesRidResolution() + { + TestCollectionCache cache = new TestCollectionCache(CollectionRid); + using DocumentServiceRequest request = DocumentServiceRequest.CreateFromName( + OperationType.Read, + "dbs/db1/colls/c1/docs/d1", + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey, + null); + request.RequestContext.ResolvedCollectionRid = CollectionRid; + + ContainerProperties resolved = await cache.ResolveCollectionAsync( + request, + CancellationToken.None, + NoOpTrace.Singleton); + + Assert.AreEqual(CollectionRid, resolved.ResourceId); + Assert.AreEqual(0, cache.NameLookupCount); + Assert.AreEqual(1, cache.RidLookupCount); + } + + /// + /// Assignment guard: when the name-based lookup returns a container whose ResourceId is + /// a database-level RID (i.e. the server sent a corrupt response), CollectionCache must + /// throw before persisting the bad value onto + /// request.RequestContext.ResolvedCollectionRid. + /// + /// On the msdata/direct branch an additional first line of defence exists: + /// the setter itself + /// emits a TraceWarning (with the call stack) whenever a non-collection RID is + /// assigned, so the exact call site can be identified in production logs even before the + /// error surfaces in . + /// + [TestMethod] + public async Task ResolveCollectionAsync_WhenNameResolutionReturnsDatabaseRid_ThrowsInvalidOperation() + { + // Use a real database RID derived from a known collection RID so that + // ResourceId.TryParse recognises it as a database-level RID. + string databaseRid = ResourceId.Parse(CollectionRid).DatabaseId.ToString(); + + // The mock returns a container whose ResourceId is a database RID — simulating + // a corrupt server response or a stale in-process cache entry. + TestCollectionCache cache = new TestCollectionCache(returnRid: databaseRid); + using DocumentServiceRequest request = DocumentServiceRequest.CreateFromName( + OperationType.Read, + "dbs/db1/colls/c1/docs/d1", + ResourceType.Document, + AuthorizationTokenType.PrimaryMasterKey, + null); + + await Assert.ThrowsExceptionAsync( + () => cache.ResolveCollectionAsync( + request, + CancellationToken.None, + NoOpTrace.Singleton)); + } + + private sealed class TestCollectionCache : CollectionCache + { + private readonly string returnRid; + + public TestCollectionCache(string returnRid) + : base(enableAsyncCacheExceptionNoSharing: false) + { + this.returnRid = returnRid; + } + + public int NameLookupCount { get; private set; } + + public int RidLookupCount { get; private set; } + + protected override Task GetByRidAsync( + string apiVersion, + string collectionRid, + ITrace trace, + IClientSideRequestStatistics clientSideRequestStatistics, + CancellationToken cancellationToken) + { + this.RidLookupCount++; + if (StringComparer.Ordinal.Equals(collectionRid, this.returnRid)) + { + return Task.FromResult(ContainerProperties.CreateWithResourceId(this.returnRid)); + } + else + { + throw new InvalidOperationException($"Unexpected collection rid: {collectionRid}"); + } + } + + protected override Task GetByNameAsync( + string apiVersion, + string resourceAddress, + ITrace trace, + IClientSideRequestStatistics clientSideRequestStatistics, + CancellationToken cancellationToken) + { + this.NameLookupCount++; + return Task.FromResult(ContainerProperties.CreateWithResourceId(this.returnRid)); + } + } + } +}