@@ -31,6 +31,21 @@ final private[openfeature] class FeatureFlagsLive(
3131 evaluationTimeout : Option [Duration ] = None
3232) extends FeatureFlags {
3333
34+ // Records when a `setProvider` swap last failed. Used by the async PROVIDER_READY bridge to decide whether an
35+ // incoming Ready event is a real recovery signal or a stale event left over from a previous attach that's racing
36+ // against the explicit Error transition set by the failed swap. See `setProvider` and `readyHandler` below.
37+ //
38+ // Initialised to `0L` (epoch). `currentTimeMillis() - 0L` will always be far beyond `FailedSwapGuardMillis`, so the
39+ // guard never trips before the first failed swap. (Using `Long.MinValue` instead would overflow the subtraction and
40+ // wrap negative, causing the guard to trip incorrectly and block legitimate Error → Ready recoveries.)
41+ private val recentSwapFailureAt = new java.util.concurrent.atomic.AtomicLong (0L )
42+
43+ // How long after a failed swap an async PROVIDER_READY event should be ignored as a likely stale signal. Real
44+ // recovery scenarios (provider was in Error for an extended period and genuinely transitions back to Ready)
45+ // happen on a much longer timescale than this; the race window we're closing is the SDK's emitter executor
46+ // dispatching a queued event that pre-dates our explicit Error.
47+ private val FailedSwapGuardMillis : Long = 500L
48+
3449 // Bridge Java SDK provider events to ZIO event system
3550 private [openfeature] def startEventBridge : ZIO [Scope , Nothing , Unit ] = {
3651 // Read provider name dynamically so events after a provider swap use the new name
@@ -67,7 +82,19 @@ final private[openfeature] class FeatureFlagsLive(
6782 val readyHandler : java.util.function.Consumer [EventDetails ] = details => {
6883 val em = extractEventMetadata(details)
6984 runHandler(runtime, " PROVIDER_READY" )(
70- state.statusRef.set(ProviderStatus .Ready ) *>
85+ // Transition statusRef only from states where PROVIDER_READY is meaningful. The `Error => Ready` arrow is
86+ // valid per the OpenFeature spec (recovery from a recoverable error) but is guarded by
87+ // `FailedSwapGuardMillis`: if the most recent statusRef write was a failed-swap Error within that window,
88+ // a Ready event arriving now is almost certainly a stale signal queued on the SDK's emitter executor before
89+ // the swap, not a genuine recovery. Real recoveries happen on timescales much longer than the guard.
90+ state.statusRef.update {
91+ case ProviderStatus .NotReady => ProviderStatus .Ready
92+ case ProviderStatus .Stale => ProviderStatus .Ready
93+ case ProviderStatus .Error =>
94+ val sinceFailure = java.lang.System .currentTimeMillis() - recentSwapFailureAt.get()
95+ if (sinceFailure >= FailedSwapGuardMillis ) ProviderStatus .Ready else ProviderStatus .Error
96+ case other => other
97+ } *>
7198 state.eventHub.publish(ProviderEvent .Ready (currentMetadata(runtime), em)).unit
7299 )
73100 onReady.foreach(_.countDown())
@@ -816,8 +843,11 @@ final private[openfeature] class FeatureFlagsLive(
816843 case None => ZIO .attemptBlocking(api.setProviderAndWait(newProvider))
817844 }).mapError(e => FeatureFlagError .ProviderInitializationFailed (e))
818845 .tapError(_ =>
819- // Rollback refs and set Error status so the instance is in a diagnosable state
820- providerRef.set(oldProvider) *>
846+ // Rollback refs and set Error status so the instance is in a diagnosable state. Stamp
847+ // `recentSwapFailureAt` BEFORE the statusRef write so the async PROVIDER_READY handler sees a fresh
848+ // failure timestamp and skips overwriting Error within the guard window.
849+ ZIO .succeed(recentSwapFailureAt.set(java.lang.System .currentTimeMillis())) *>
850+ providerRef.set(oldProvider) *>
821851 providerNameRef.set(oldName) *>
822852 state.statusRef.set(ProviderStatus .Error )
823853 )
0 commit comments