Skip to content

Commit 5d3d82f

Browse files
brantburnettjeffrymorris
authored andcommitted
NCBC-4128: Clear dead nodes after select bucket failure
Motivation ---------- When attempting to reuses a globally bootstrapped node and select a bucket so it becomes a bucket-specific node, if the select bucket call fails the node may have all connections dead. Because the select bucket call goes through a special path to all connections in the pool, it can then continue to fail because the dead connection is still there and will fail on the next attempt. Modifications ------------- - Don't attempt to select bucket on dead connections in the connection pool - Throw an error if there are no healthy connections in the pool when select bucket is attempted - On a general exception calling select bucket, discard the node so the next attempt will use a fresh connection pool Results ------- Greater resiliency in cases where the cluster isn't fully accessible during bootstrap and the first call to BucketAsync fails. This prevents the Cluster from being in a bad state until the application is recycled. Change-Id: I2db1e2f6653ef604f6d3e03405e99a1264ae486d Reviewed-on: https://review.couchbase.org/c/couchbase-net-client/+/239143 Tested-by: Build Bot <build@couchbase.com> Reviewed-by: Jeffry Morris <jeffrymorris@gmail.com>
1 parent f8fb0d7 commit 5d3d82f

2 files changed

Lines changed: 21 additions & 1 deletion

File tree

src/Couchbase/Core/ClusterContext.cs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
using System.Collections.Generic;
2020
using System.Linq;
2121
using System.Runtime.ExceptionServices;
22-
using System.Text.Json;
2322
using System.Threading;
2423
using System.Threading.Tasks;
2524

@@ -629,6 +628,19 @@ public async Task<BucketBase> CreateAndBootStrapBucketAsync(string name, HostEnd
629628
config = await _httpClusterMap.GetClusterMapAsync(
630629
name, node.EndPoint, CancellationToken.None).ConfigureAwait(false);
631630
}
631+
catch
632+
{
633+
// The ClusterNode may be in a bad state, such as all connections being unhealthy, so we'll
634+
// never be able to reuse this node for a later SelectBucketAsync call. Clean up so the
635+
// next attempt can succeed with a fresh node.
636+
if (!newNode)
637+
{
638+
RemoveNode(node);
639+
}
640+
641+
node.Dispose();
642+
throw;
643+
}
632644

633645
//Determine the bucket type to create based off the bucket capabilities
634646
var type = config.BucketCapabilities.Contains("cccp") ? BucketType.Couchbase : BucketType.Memcached;

src/Couchbase/Core/IO/Connections/ConnectionPoolBase.cs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
using System.Linq;
55
using System.Threading;
66
using System.Threading.Tasks;
7+
using Couchbase.Core.Exceptions.KeyValue;
78
using Couchbase.Core.IO.Operations;
89
using Couchbase.Utils;
910
using Microsoft.Extensions.Logging;
@@ -119,6 +120,7 @@ public virtual async Task SelectBucketAsync(string name, CancellationToken cance
119120
await using ((await FreezePoolAsync(cancellationToken).ConfigureAwait(false)).ConfigureAwait(false))
120121
{
121122
var tasks = GetConnections()
123+
.Where(p => !p.IsDead)
122124
.Select(connection => Task.Run(async () =>
123125
{
124126
try
@@ -136,6 +138,12 @@ await _connectionInitializer.SelectBucketAsync(connection, name, cancellationTok
136138
}, cancellationToken))
137139
.ToList();
138140

141+
if (tasks.Count == 0)
142+
{
143+
// There are no healthy connections, so we cannot select the bucket
144+
throw new DocumentNotFoundException($"Failed to select bucket {BucketName}.");
145+
}
146+
139147
await Task.WhenAll(tasks).ConfigureAwait(false);
140148

141149
BucketName = name;

0 commit comments

Comments
 (0)