Skip to content

Commit 4542176

Browse files
mikaelweaveCopilot
andauthored
Return HTTP 409 (Conflict) for Sequential Bundle Race Condition (#5592)
* test: add theory1 transaction conflict repro Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * fix: return conflict for enlisted SQL merge conflicts Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * test: address transaction conflict review feedback Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Scope SQL conflict fail-fast to ambient transactions only The earlier fix gated the fail-fast 409 on MergeOptions.EnlistInTransaction, but regular single-resource upserts also set enlistTransaction: true. That caused concurrent no-ETag upserts (which should retry to a last-write-wins outcome) to surface as ResourceConflictException (409) instead of retrying, breaking GivenASavedResource_WhenConcurrentlyUpsertingWithNoETagHeader_ThenTheExistingResourceIsUpdated. A SQL conflict (50409) only zombies the transaction when the command actually enlisted in an ambient C# transaction scope (the condition used in MergeResourcesWrapperAsync). Gate the fail-fast on EnlistInTransaction && SqlTransactionScope != null, captured before the merge attempt, so only sequential transaction bundles fail fast while regular and batch-bundle upserts keep retrying. Updated the enlisted unit test to set up a real ambient scope via BeginTransaction(), and added a regression test asserting enlist:true without an ambient scope still retries. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Document 50424 fail-fast gap for ambient transactions Add a comment noting that a SurrogateIdCollision (50424) inside an ambient C# transaction zombies the transaction the same way a 50409 conflict does, so the retries there are futile and still surface as a 500. Intentionally left unhandled since 50424 is rare and C# transactions are being deprecated. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Add explanatory comments for transaction conflict 500 vs 409 paths Document why a zombied-at-commit transaction returns 500 (root cause lost) while an inner-request conflict surfaces a precise 409 before commit. Condense data-store comments. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Remove trailing whitespace (SA1028) Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 9319e6d commit 4542176

4 files changed

Lines changed: 213 additions & 8 deletions

File tree

src/Microsoft.Health.Fhir.Core/Features/Persistence/ResourceConflictException.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
// -------------------------------------------------------------------------------------------------
55

66
using System.Diagnostics;
7+
using EnsureThat;
78
using Microsoft.Health.Fhir.Core.Exceptions;
89
using Microsoft.Health.Fhir.Core.Models;
910

@@ -20,5 +21,15 @@ public ResourceConflictException(WeakETag etag)
2021
OperationOutcomeConstants.IssueType.Conflict,
2122
string.Format(Core.Resources.ResourceVersionConflict, etag?.VersionId)));
2223
}
24+
25+
public ResourceConflictException(string message)
26+
{
27+
EnsureArg.IsNotNullOrWhiteSpace(message, nameof(message));
28+
29+
Issues.Add(new OperationOutcomeIssue(
30+
OperationOutcomeConstants.IssueSeverity.Error,
31+
OperationOutcomeConstants.IssueType.Conflict,
32+
message));
33+
}
2334
}
2435
}

src/Microsoft.Health.Fhir.Shared.Api.UnitTests/Features/Resources/Bundle/BundleHandlerTests.cs

Lines changed: 128 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using System.Collections.Generic;
88
using System.Globalization;
99
using System.Linq;
10+
using System.Net;
1011
using System.Threading;
1112
using Hl7.Fhir.Model;
1213
using Hl7.Fhir.Serialization;
@@ -62,6 +63,7 @@ public class BundleHandlerTests
6263
private readonly BundleConfiguration _bundleConfiguration;
6364
private readonly IMediator _mediator;
6465
private readonly IBundleMetricHandler _bundleMetricHandler;
66+
private readonly ITransactionHandler _transactionHandler;
6567
private DefaultFhirRequestContext _fhirRequestContext;
6668
private readonly IProvideProfilesForValidation _profilesResolver;
6769

@@ -114,7 +116,7 @@ public BundleHandlerTests()
114116
};
115117
httpContextAccessor.HttpContext.Returns(httpContext);
116118

117-
var transactionHandler = Substitute.For<ITransactionHandler>();
119+
_transactionHandler = Substitute.For<ITransactionHandler>();
118120

119121
var resourceIdProvider = new ResourceIdProvider();
120122

@@ -132,7 +134,7 @@ public BundleHandlerTests()
132134
fhirRequestContextAccessor,
133135
fhirJsonSerializer,
134136
fhirJsonParser,
135-
transactionHandler,
137+
_transactionHandler,
136138
bundleHttpContextAccessor,
137139
bundleOrchestrator,
138140
resourceIdProvider,
@@ -467,6 +469,130 @@ public async Task GivenATransaction_WithACrashDuringCSharpTransaction_ReturnAPro
467469
Assert.True(fhirTfe.ResponseStatusCode == System.Net.HttpStatusCode.InternalServerError);
468470
}
469471

472+
// Scenario: the inner requests succeed, but committing the C# transaction throws because the
473+
// ambient SqlTransaction was already zombied (e.g. by a SQL error during an earlier entry that
474+
// was not surfaced before reaching Complete()). At that point the real cause is gone and we only
475+
// see a generic "This SqlTransaction has completed" InvalidOperationException, so the handler maps
476+
// it to a 500. This is the fallback path - contrast with the 409 test below, where the conflict is
477+
// surfaced by an inner request before commit and can be mapped to a precise status.
478+
[Fact]
479+
public async Task GivenATransaction_WhenTransactionIsZombiedAtCommit_ThenHttp500IsReturned()
480+
{
481+
_bundleConfiguration.TransactionDefaultProcessingLogic = BundleProcessingLogic.Sequential;
482+
483+
var bundle = new Hl7.Fhir.Model.Bundle
484+
{
485+
Type = BundleType.Transaction,
486+
Entry = new List<EntryComponent>
487+
{
488+
new EntryComponent
489+
{
490+
Request = new RequestComponent
491+
{
492+
Method = HTTPVerb.POST,
493+
Url = "/Observation",
494+
},
495+
Resource = new Observation(),
496+
},
497+
new EntryComponent
498+
{
499+
Request = new RequestComponent
500+
{
501+
Method = HTTPVerb.POST,
502+
Url = "/Observation",
503+
},
504+
Resource = new Observation(),
505+
},
506+
},
507+
};
508+
509+
ITransactionScope transactionScope = Substitute.For<ITransactionScope>();
510+
_transactionHandler.BeginTransaction().Returns(transactionScope);
511+
transactionScope
512+
.When(scope => scope.Complete())
513+
.Do(_ => throw new InvalidOperationException("This SqlTransaction has completed; it is no longer usable."));
514+
515+
_router.When(r => r.RouteAsync(Arg.Any<RouteContext>()))
516+
.Do(info =>
517+
{
518+
info.Arg<RouteContext>().Handler = context =>
519+
{
520+
context.Response.StatusCode = StatusCodes.Status201Created;
521+
return Task.CompletedTask;
522+
};
523+
});
524+
525+
var bundleRequest = new BundleRequest(bundle.ToResourceElement());
526+
FhirTransactionFailedException fhirTfe = await Assert.ThrowsAsync<FhirTransactionFailedException>(() => _bundleHandler.Handle(bundleRequest, default));
527+
528+
Assert.Equal(HttpStatusCode.InternalServerError, fhirTfe.ResponseStatusCode);
529+
}
530+
531+
// Scenario: an inner request fails fast with a 409 conflict (as the SQL data store now does for a
532+
// concurrency conflict inside an ambient transaction, throwing ResourceConflictException). Because
533+
// the conflict is surfaced as an entry response before the transaction is committed, the handler can
534+
// propagate the precise 409 to the caller instead of the generic 500 from the zombied-at-commit path
535+
// above. This is the user-facing behavior that the SqlServerFhirDataStore fail-fast enables.
536+
[Fact]
537+
public async Task GivenATransaction_WhenInnerRequestReturnsConflictOperationOutcome_ThenHttp409IsReturned()
538+
{
539+
_bundleConfiguration.TransactionDefaultProcessingLogic = BundleProcessingLogic.Sequential;
540+
541+
var bundle = new Hl7.Fhir.Model.Bundle
542+
{
543+
Type = BundleType.Transaction,
544+
Entry = new List<EntryComponent>
545+
{
546+
new EntryComponent
547+
{
548+
Request = new RequestComponent
549+
{
550+
Method = HTTPVerb.POST,
551+
Url = "/Observation",
552+
},
553+
Resource = new Observation(),
554+
},
555+
new EntryComponent
556+
{
557+
Request = new RequestComponent
558+
{
559+
Method = HTTPVerb.POST,
560+
Url = "/Observation",
561+
},
562+
Resource = new Observation(),
563+
},
564+
},
565+
};
566+
567+
_router.When(r => r.RouteAsync(Arg.Any<RouteContext>()))
568+
.Do(info =>
569+
{
570+
info.Arg<RouteContext>().Handler = async context =>
571+
{
572+
var outcome = new OperationOutcome
573+
{
574+
Issue = new List<OperationOutcome.IssueComponent>
575+
{
576+
new OperationOutcome.IssueComponent
577+
{
578+
Severity = OperationOutcome.IssueSeverity.Error,
579+
Code = OperationOutcome.IssueType.Conflict,
580+
Diagnostics = "Resource has been recently updated or added",
581+
},
582+
},
583+
};
584+
585+
context.Response.StatusCode = StatusCodes.Status409Conflict;
586+
await context.Response.WriteAsync(outcome.ToJson());
587+
};
588+
});
589+
590+
var bundleRequest = new BundleRequest(bundle.ToResourceElement());
591+
FhirTransactionFailedException fhirTfe = await Assert.ThrowsAsync<FhirTransactionFailedException>(() => _bundleHandler.Handle(bundleRequest, default));
592+
593+
Assert.Equal(HttpStatusCode.Conflict, fhirTfe.ResponseStatusCode);
594+
}
595+
470596
[Fact]
471597
public async Task GivenATransaction_WithACancellationHappens_ReturnAProperError()
472598
{

src/Microsoft.Health.Fhir.SqlServer.UnitTests/Features/Storage/SqlServerFhirDataStoreUnitTests.cs

Lines changed: 64 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -197,11 +197,12 @@ private static bool InvokeChangesAreOnlyInMetadata(ResourceWrapper inputWrapper,
197197
// This method is indirectly tested through integration tests (UpdateTests, FhirPathPatchTests)
198198

199199
[Fact]
200-
public async Task MergeAsync_OnSqlConflict_WithAtomicOptions_ThrowsPreconditionFailedException()
200+
public async Task MergeAsync_OnSqlConflict_WhenEnlistedInAmbientTransaction_ThrowsResourceConflictExceptionWithoutRetry()
201201
{
202202
// Arrange
203203
var sqlRetryService = Substitute.For<ISqlRetryService>();
204204
var sqlException = SqlExceptionFactory.GetSqlException(SqlErrorCodes.Conflict, "SQL Conflict");
205+
using var cts = new CancellationTokenSource();
205206

206207
sqlRetryService.ExecuteReaderAsync(
207208
Arg.Any<SqlCommand>(),
@@ -212,12 +213,67 @@ public async Task MergeAsync_OnSqlConflict_WithAtomicOptions_ThrowsPreconditionF
212213
Arg.Any<bool>())
213214
.Throws(sqlException);
214215

216+
sqlRetryService
217+
.TryLogEvent(Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<DateTime?>(), Arg.Any<CancellationToken>())
218+
.Returns(async callInfo =>
219+
{
220+
// Tripwire: if the retry loop is incorrectly entered, cancellation makes the test fail fast rather than hang.
221+
cts.Cancel();
222+
await Task.CompletedTask;
223+
});
224+
225+
// An ambient C# transaction (as opened by a sequential transaction bundle) is the condition that
226+
// zombies on a SQL conflict, so the data store must fail fast rather than retry within it.
227+
var transactionHandler = new SqlTransactionHandler();
228+
using var transactionScope = transactionHandler.BeginTransaction();
229+
230+
var dataStore = CreateSqlServerFhirDataStore(sqlRetryService, transactionHandler);
231+
var resources = CreateResourceWrapperOperations();
232+
233+
// Act & Assert
234+
await Assert.ThrowsAsync<ResourceConflictException>(
235+
() => dataStore.MergeAsync(resources, new MergeOptions(enlistTransaction: true, ensureAtomicOperations: true), cts.Token));
236+
237+
await sqlRetryService.DidNotReceive()
238+
.TryLogEvent("MergeAsync", "Warn", Arg.Any<string>(), Arg.Any<DateTime?>(), Arg.Any<CancellationToken>());
239+
}
240+
241+
[Fact]
242+
public async Task MergeAsync_OnSqlConflict_WhenEnlistTransactionWithoutAmbientScope_RetriesBeforeThrowing()
243+
{
244+
// Arrange - a regular (non-bundle) upsert sets enlistTransaction: true but runs without an ambient
245+
// C# transaction scope. There is nothing to zombie, so a SQL conflict must still be retried
246+
// (last-write-wins), not converted into a fail-fast 409.
247+
var sqlRetryService = Substitute.For<ISqlRetryService>();
248+
var sqlException = SqlExceptionFactory.GetSqlException(SqlErrorCodes.Conflict, "SQL Conflict");
249+
using var cts = new CancellationTokenSource();
250+
251+
sqlRetryService.ExecuteReaderAsync(
252+
Arg.Any<SqlCommand>(),
253+
Arg.Any<Func<SqlDataReader, ResourceWrapper>>(),
254+
Arg.Any<ILogger>(),
255+
Arg.Any<string>(),
256+
Arg.Any<CancellationToken>(),
257+
Arg.Any<bool>())
258+
.Throws(sqlException);
259+
260+
sqlRetryService
261+
.TryLogEvent(Arg.Any<string>(), Arg.Any<string>(), Arg.Any<string>(), Arg.Any<DateTime?>(), Arg.Any<CancellationToken>())
262+
.Returns(async callInfo =>
263+
{
264+
cts.Cancel();
265+
await Task.CompletedTask;
266+
});
267+
215268
var dataStore = CreateSqlServerFhirDataStore(sqlRetryService);
216269
var resources = CreateResourceWrapperOperations();
217270

218271
// Act & Assert
219-
await Assert.ThrowsAsync<PreconditionFailedException>(
220-
() => dataStore.MergeAsync(resources, new MergeOptions(enlistTransaction: true, ensureAtomicOperations: true), CancellationToken.None));
272+
await Assert.ThrowsAsync<TaskCanceledException>(
273+
() => dataStore.MergeAsync(resources, new MergeOptions(enlistTransaction: true, ensureAtomicOperations: false), cts.Token));
274+
275+
await sqlRetryService.Received(1)
276+
.TryLogEvent("MergeAsync", "Warn", Arg.Any<string>(), Arg.Any<DateTime?>(), Arg.Any<CancellationToken>());
221277
}
222278

223279
[Fact]
@@ -315,8 +371,10 @@ public void GivenUnknownResourceType_WhenGettingResourceTypeId_ThenResourceNotFo
315371
Assert.Contains("is not a known resource type", exception.Message, StringComparison.Ordinal);
316372
}
317373

318-
private static SqlServerFhirDataStore CreateSqlServerFhirDataStore(ISqlRetryService sqlRetryService)
374+
private static SqlServerFhirDataStore CreateSqlServerFhirDataStore(ISqlRetryService sqlRetryService, SqlTransactionHandler sqlTransactionHandler = null)
319375
{
376+
sqlTransactionHandler ??= new SqlTransactionHandler();
377+
320378
ModelInfoProvider.SetProvider(MockModelInfoProviderBuilder.Create(FhirSpecification.R4).AddKnownTypes(KnownResourceTypes.Group).Build());
321379

322380
var schemaInfo = new SchemaInformation(SchemaVersionConstants.Min, SchemaVersionConstants.Max)
@@ -370,7 +428,7 @@ private static SqlServerFhirDataStore CreateSqlServerFhirDataStore(ISqlRetryServ
370428

371429
var sqlServerDataStoreConfiguration = new SqlServerDataStoreConfiguration() { ConnectionString = sqlConnection.ConnectionString };
372430

373-
var sqlConnectionWrapperFactory = new SqlConnectionWrapperFactory(new SqlTransactionHandler(), sqlConnectionBuilder, sqlRetryLogicBaseProvider, Options.Create(sqlServerDataStoreConfiguration));
431+
var sqlConnectionWrapperFactory = new SqlConnectionWrapperFactory(sqlTransactionHandler, sqlConnectionBuilder, sqlRetryLogicBaseProvider, Options.Create(sqlServerDataStoreConfiguration));
374432

375433
var dataStore = new SqlServerFhirDataStore(
376434
model,
@@ -381,7 +439,7 @@ private static SqlServerFhirDataStore CreateSqlServerFhirDataStore(ISqlRetryServ
381439
NullLogger<BundleOrchestrator>.Instance),
382440
sqlRetryService,
383441
sqlConnectionWrapperFactory,
384-
new SqlTransactionHandler(),
442+
sqlTransactionHandler,
385443
Substitute.For<ICompressedRawResourceConverter>(),
386444
NullLogger<SqlServerFhirDataStore>.Instance,
387445
schemaInfo,

src/Microsoft.Health.Fhir.SqlServer/Features/Storage/SqlServerFhirDataStore.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ public async Task<MergeOutcome> MergeAsync(IReadOnlyList<ResourceWrapperOperatio
156156
{
157157
cancellationToken.ThrowIfCancellationRequested();
158158

159+
// Capture whether this attempt is enlisted in an ambient C# transaction (e.g. a sequential transaction bundle).
160+
bool wasEnlistedInAmbientTransaction = mergeOptions.EnlistInTransaction && _sqlTransactionHandler.SqlTransactionScope != null;
161+
159162
try
160163
{
161164
var results = await MergeInternalAsync(
@@ -180,6 +183,13 @@ public async Task<MergeOutcome> MergeAsync(IReadOnlyList<ResourceWrapperOperatio
180183

181184
if (sqlEx.Number == SqlErrorCodes.Conflict)
182185
{
186+
if (wasEnlistedInAmbientTransaction)
187+
{
188+
// The ambient SqlTransaction is now zombied by this conflict; retrying within it is futile. Fail fast with a 409 so the client can retry the whole transaction bundle.
189+
_logger.LogWarning(e, "Conflict: ResourceConcurrentUpdateConflict in ambient transaction; failing fast (SQL error {SqlErrorNumber}).", sqlEx.Number);
190+
throw new ResourceConflictException(Resources.ResourceConcurrentUpdateConflict);
191+
}
192+
183193
if (retries++ >= maxRetries)
184194
{
185195
_logger.LogInformation("PreconditionFailed: ResourceConcurrentUpdateConflict");

0 commit comments

Comments
 (0)