From fa0b999adb808fe2f9e058e5c11180d71c7c5720 Mon Sep 17 00:00:00 2001 From: Todd Baert Date: Thu, 23 Apr 2026 07:35:14 -0400 Subject: [PATCH 1/3] fix: provider state race Signed-off-by: Todd Baert --- specification.json | 57 +++++++++++++-- specification/README.md | 1 + specification/appendix-e-migrations.md | 77 ++++++++++++++++++++ specification/sections/01-flag-evaluation.md | 22 +++--- specification/sections/02-providers.md | 57 +++++++++++++++ specification/sections/05-events.md | 49 ++++++------- 6 files changed, 222 insertions(+), 41 deletions(-) create mode 100644 specification/appendix-e-migrations.md diff --git a/specification.json b/specification.json index 35ce7efc2..36ec98781 100644 --- a/specification.json +++ b/specification.json @@ -310,7 +310,7 @@ { "id": "Conditional Requirement 1.7.2.1", "machine_id": "conditional_requirement_1_7_2_1", - "content": "In addition to `NOT_READY`, `READY`, `STALE`, or `ERROR`, the `provider status` accessor must support possible value `RECONCILING`.", + "content": "In addition to `NOT_READY`, `READY`, `STALE`, or `ERROR`, the `provider status` accessor must support possible value `RECONCILING`.", "RFC 2119 keyword": null, "children": [] } @@ -319,21 +319,21 @@ { "id": "Requirement 1.7.3", "machine_id": "requirement_1_7_3", - "content": "The client's `provider status` accessor MUST indicate `READY` if the `initialize` function of the associated provider terminates normally.", + "content": "The `provider status` MUST indicate `READY` if the `initialize` function of the associated provider terminates normally.", "RFC 2119 keyword": "MUST", "children": [] }, { "id": "Requirement 1.7.4", "machine_id": "requirement_1_7_4", - "content": "The client's `provider status` accessor MUST indicate `ERROR` if the `initialize` function of the associated provider terminates abnormally.", + "content": "The `provider status` MUST indicate `ERROR` if the `initialize` function of the associated provider terminates abnormally.", "RFC 2119 keyword": "MUST", "children": [] }, { "id": "Requirement 1.7.5", "machine_id": "requirement_1_7_5", - "content": "The client's `provider status` accessor MUST indicate `FATAL` if the `initialize` function of the associated provider terminates abnormally and indicates `error code` `PROVIDER_FATAL`.", + "content": "The `provider status` MUST indicate `FATAL` if the `initialize` function of the associated provider terminates abnormally and indicates `error code` `PROVIDER_FATAL`.", "RFC 2119 keyword": "MUST", "children": [] }, @@ -361,7 +361,7 @@ { "id": "Requirement 1.7.9", "machine_id": "requirement_1_7_9", - "content": "The client's `provider status` accessor MUST indicate `NOT_READY` once the `shutdown` function of the associated provider terminates.", + "content": "The `provider status` MUST indicate `NOT_READY` once the `shutdown` function of the associated provider terminates.", "RFC 2119 keyword": "MUST", "children": [] }, @@ -564,6 +564,49 @@ "RFC 2119 keyword": "MAY", "children": [] }, + { + "id": "Requirement 2.8.1", + "machine_id": "requirement_2_8_1", + "content": "The provider MUST define a `status` accessor which indicates the provider's current readiness, with possible values `NOT_READY`, `READY`, `STALE`, `ERROR`, or `FATAL`.", + "RFC 2119 keyword": "MUST", + "children": [] + }, + { + "id": "Condition 2.8.2", + "machine_id": "condition_2_8_2", + "content": "The implementation uses the static-context paradigm.", + "RFC 2119 keyword": null, + "children": [ + { + "id": "Conditional Requirement 2.8.2.1", + "machine_id": "conditional_requirement_2_8_2_1", + "content": "In addition to `NOT_READY`, `READY`, `STALE`, `ERROR`, or `FATAL`, the provider's `status` accessor MUST support possible value `RECONCILING`.", + "RFC 2119 keyword": "MUST", + "children": [] + } + ] + }, + { + "id": "Requirement 2.8.3", + "machine_id": "requirement_2_8_3", + "content": "The provider's `status` MUST be `NOT_READY` before `initialize` is called and after `shutdown` terminates.", + "RFC 2119 keyword": "MUST", + "children": [] + }, + { + "id": "Requirement 2.8.4", + "machine_id": "requirement_2_8_4", + "content": "The provider's `status` accessor MUST be safe for concurrent access.", + "RFC 2119 keyword": "MUST", + "children": [] + }, + { + "id": "Requirement 2.8.5", + "machine_id": "requirement_2_8_5", + "content": "Status changes and any associated event emissions MUST be atomic from the perspective of external observers.", + "RFC 2119 keyword": "MUST", + "children": [] + }, { "id": "Requirement 3.1.1", "machine_id": "requirement_3_1_1", @@ -984,7 +1027,7 @@ { "id": "Requirement 5.1.1", "machine_id": "requirement_5_1_1", - "content": "The `provider` MAY define a mechanism for signaling the occurrence of one of a set of events, including `PROVIDER_READY`, `PROVIDER_ERROR`, `PROVIDER_CONFIGURATION_CHANGED` and `PROVIDER_STALE`, with a `provider event details` payload.", + "content": "The `provider` MAY define a mechanism for signaling the occurrence of one of a set of events, including `PROVIDER_READY`, `PROVIDER_ERROR`, `PROVIDER_CONFIGURATION_CHANGED`, `PROVIDER_STALE`, `PROVIDER_RECONCILING`, and `PROVIDER_CONTEXT_CHANGED`, with a `provider event details` payload.", "RFC 2119 keyword": "MAY", "children": [] }, @@ -1118,7 +1161,7 @@ { "id": "Requirement 5.3.5", "machine_id": "requirement_5_3_5", - "content": "If the provider emits an event, the value of the client's `provider status` MUST be updated to the status associated with that event **before** the SDK invokes any event handlers for that event, so that handlers observe a consistent status.", + "content": "When a provider emits an event, the value of the `provider status` MUST reflect the status associated with that event **before** any event handlers for that event are invoked, so that handlers observe a consistent status.", "RFC 2119 keyword": "MUST", "children": [] }, diff --git a/specification/README.md b/specification/README.md index c39658d57..6772899d1 100644 --- a/specification/README.md +++ b/specification/README.md @@ -21,6 +21,7 @@ sidebar_position: 0 - [Appendix B: Gherkin Suites](./appendix-b-gherkin-suites.md) - [Appendix C: OFREP](./appendix-c/index.md) - [Appendix D: Observability](./appendix-d-observability.md) +- [Appendix E: Migrations](./appendix-e-migrations.md) ## Conformance diff --git a/specification/appendix-e-migrations.md b/specification/appendix-e-migrations.md new file mode 100644 index 000000000..8aed01fd4 --- /dev/null +++ b/specification/appendix-e-migrations.md @@ -0,0 +1,77 @@ +--- +id: appendix-e +title: "Appendix E: Migrations" +description: SDK migration guidance for breaking spec changes +sidebar_position: 6 +--- + +# Appendix E: Migrations + +This appendix provides non-normative guidance for SDK authors on migrating to new or changed specification requirements without breaking existing providers or application consumers. + +## Provider Status Ownership + +### Background + +Prior to this change, provider status (e.g. `NOT_READY`, `READY`, `ERROR`) was managed by the SDK on behalf of the provider. The SDK would set status and emit events after lifecycle methods (`initialize`, `shutdown`, `on context change`) returned. This created a race condition in multi-threaded SDKs: the provider could change its own state (e.g. emit an error event from a background thread) in the window between the lifecycle method returning and the SDK writing its post-lifecycle status and emitting the corresponding event. The result was incorrect event ordering and inconsistent status. + +The spec now requires providers to own their status and emit events atomically with status transitions (see [provider status](./sections/02-providers.md#28-provider-status)). + +### Migration: the `StateManagingProvider` interface + +To avoid breaking existing providers, SDKs should introduce an opt-in interface (or equivalent mechanism) that new providers implement to signal they manage their own status. Existing providers that do not implement this interface continue to have their status managed by the SDK, preserving backwards compatibility. This legacy behavior is deprecated and should be removed in the next major version. + +The interface should expose: + +- A `status` accessor that returns the provider's current status +- A discriminant or marker (e.g. an additional interface, a boolean property, or a type-level tag) that allows the SDK to detect at registration time whether the provider manages its own state + +### SDK wrapper behavior + +SDKs typically wrap registered providers in an internal adapter (e.g. a "provider wrapper" or "state manager") that mediates lifecycle calls and event forwarding. The wrapper should branch based on whether the registered provider implements the state-managing interface. + +```mermaid +flowchart TD + A[Provider registered with SDK] --> B{Implements state-managing interface?} + + B -- Yes --> C[SDK wrapper delegates status to provider] + C --> C1[initialize / shutdown / onContextChange: SDK skips state writes AND event emissions] + C --> C2[Provider events: SDK skips state writes] + C --> C3[status: reads from provider directly] + + B -- No --> D[SDK wrapper manages state internally - legacy, deprecated] + D --> D1[initialize / shutdown / onContextChange: SDK sets state AND emits events after return] + D --> D2[Provider events: SDK updates state on emit] + D --> D3[status: reads from SDK wrapper] + + C1 --> E[Provider-emitted events still propagate to registered handlers] + C2 --> E + C3 --> E + D1 --> E + D2 --> E + D3 --> E +``` + +### What the SDK skips for state-managing providers + +For providers that implement the state-managing interface, the SDK must not perform any of the following actions that it would normally perform for legacy providers: + +- Setting status to `READY` after `initialize()` succeeds +- Setting status to `ERROR` or `FATAL` after `initialize()` fails +- Setting status to `NOT_READY` after `shutdown()` completes +- Emitting `PROVIDER_READY` or `PROVIDER_ERROR` events after `initialize()` +- Updating status when the provider emits events at runtime (the provider already set its own status atomically with the event) +- (Static-context paradigm only) Setting `RECONCILING` status, emitting `PROVIDER_RECONCILING`, setting `READY`/`ERROR` status, or emitting `PROVIDER_CONTEXT_CHANGED`/`PROVIDER_ERROR` during `on context change` handling + +### What the SDK still does for all providers + +Regardless of whether the provider manages its own state, the SDK continues to: + +- Call `initialize()`, `shutdown()`, and `on context change` lifecycle methods on the provider +- Forward provider-emitted events to registered domain and API-level event handlers +- Run late-attached handlers immediately if the provider is already in the associated state +- Enforce short-circuit behavior for `NOT_READY` and `FATAL` statuses during flag evaluation + +### Deprecation + +The legacy path (SDK-managed status) should be deprecated in the release that introduces the state-managing interface, with removal targeted for the next major version. SDK authors should update any first-party providers and provider base classes to implement the new interface. diff --git a/specification/sections/01-flag-evaluation.md b/specification/sections/01-flag-evaluation.md index 6f272c480..d7214d5ca 100644 --- a/specification/sections/01-flag-evaluation.md +++ b/specification/sections/01-flag-evaluation.md @@ -421,8 +421,8 @@ This is especially useful for testing purposes to restore the API to a known sta ### 1.7. Provider Lifecycle Management -The implementation maintains an internal representation of the state of configured providers, tracking the lifecycle of each provider. -This state of the provider is exposed on associated `clients`. +Providers own their current status (see [provider status](./02-providers.md#28-provider-status)). +The `client`'s `provider status` accessor delegates to the associated provider's `status` accessor, and SDKs surface provider-emitted events to registered handlers. The diagram below illustrates the possible states and transitions of the `state` field for a provider during the provider lifecycle. @@ -463,9 +463,9 @@ stateDiagram-v2 > The `client` **MUST** define a `provider status` accessor which indicates the readiness of the associated provider, with possible values `NOT_READY`, `READY`, `STALE`, `ERROR`, or `FATAL`. -The SDK at all times maintains an up-to-date state corresponding to the success/failure of the last lifecycle method (`initialize`, `shutdown`, `on context change`) or emitted event. +The client's `provider status` accessor delegates to the associated provider's `status` accessor, which the provider keeps in sync with the success/failure of the last lifecycle method (`initialize`, `shutdown`, `on context change`) or emitted event. -see [provider status](../types.md#provider-status) +see [provider status](../types.md#provider-status), [provider status requirements](./02-providers.md#28-provider-status) #### Condition 1.7.2 @@ -477,27 +477,30 @@ see: [static-context paradigm](../glossary.md#static-context-paradigm) ##### Conditional Requirement 1.7.2.1 -> In addition to `NOT_READY`, `READY`, `STALE`, or `ERROR`, the `provider status` accessor must support possible value `RECONCILING`. +> In addition to `NOT_READY`, `READY`, `STALE`, or `ERROR`, the `provider status` accessor must support possible value `RECONCILING`. In the static context paradigm, the implementation must define a `provider status` indicating that a provider is reconciling its internal state due to a context change. #### Requirement 1.7.3 -> The client's `provider status` accessor **MUST** indicate `READY` if the `initialize` function of the associated provider terminates normally. +> The `provider status` **MUST** indicate `READY` if the `initialize` function of the associated provider terminates normally. Once the provider has initialized, the `provider status` should indicate the provider is ready to be used to evaluate flags. +The provider is responsible for its own status transition; the client's `provider status` accessor reflects it. #### Requirement 1.7.4 -> The client's `provider status` accessor **MUST** indicate `ERROR` if the `initialize` function of the associated provider terminates abnormally. +> The `provider status` **MUST** indicate `ERROR` if the `initialize` function of the associated provider terminates abnormally. If the provider has failed to initialize, the `provider status` should indicate the provider is in an error state. +The provider is responsible for its own status transition; the client's `provider status` accessor reflects it. #### Requirement 1.7.5 -> The client's `provider status` accessor **MUST** indicate `FATAL` if the `initialize` function of the associated provider terminates abnormally and indicates `error code` `PROVIDER_FATAL`. +> The `provider status` **MUST** indicate `FATAL` if the `initialize` function of the associated provider terminates abnormally and indicates `error code` `PROVIDER_FATAL`. If the provider has failed to initialize, the `provider status` should indicate the provider is in an error state. +The provider is responsible for its own status transition; the client's `provider status` accessor reflects it. #### Requirement 1.7.6 @@ -527,9 +530,10 @@ see: [error codes](../types.md#error-code) #### Requirement 1.7.9 -> The client's `provider status` accessor **MUST** indicate `NOT_READY` once the `shutdown` function of the associated provider terminates. +> The `provider status` **MUST** indicate `NOT_READY` once the `shutdown` function of the associated provider terminates. Regardless of the success of the provider's `shutdown` function, the `provider status` should convey the provider is no longer ready to use once the shutdown function terminates. +The provider is responsible for its own status transition; the client's `provider status` accessor reflects it. ### 1.8. Isolated API Instances diff --git a/specification/sections/02-providers.md b/specification/sections/02-providers.md index baa7d87e3..3872ad114 100644 --- a/specification/sections/02-providers.md +++ b/specification/sections/02-providers.md @@ -305,3 +305,60 @@ The track function performs side effects required to record the `tracking event` Providers should be careful to complete any communication or flush any relevant uncommitted tracking data before they shut down. See [shutdown](#25-shutdown). + +### 2.8. Provider status + +[![hardening](https://img.shields.io/static/v1?label=Status&message=hardening&color=yellow)](https://github.com/open-feature/spec/tree/main/specification#hardening) + +Providers own their current status and any events associated with status transitions. +This allows providers to atomically update their status and emit the corresponding event, avoiding races between lifecycle methods terminating and events or status updates produced by concurrent work (such as background threads or pollers) maintained by the provider. + +SDKs may provide a base class, wrapper, or other mechanism that maintains status on behalf of provider implementations which do not define it natively, for migration purposes. + +see: [provider lifecycle management](./01-flag-evaluation.md#17-provider-lifecycle-management), [provider events](./05-events.md#51-provider-events) + +#### Requirement 2.8.1 + +> The provider **MUST** define a `status` accessor which indicates the provider's current readiness, with possible values `NOT_READY`, `READY`, `STALE`, `ERROR`, or `FATAL`. + +The `status` accessor reflects the provider's readiness to evaluate flags. +The client's `provider status` accessor (see [requirement 1.7.1](./01-flag-evaluation.md#requirement-171)) delegates to this accessor. + +see: [provider status](../types.md#provider-status) + +#### Condition 2.8.2 + +> The implementation uses the static-context paradigm. + +see: [static-context paradigm](../glossary.md#static-context-paradigm) + +##### Conditional Requirement 2.8.2.1 + +> In addition to `NOT_READY`, `READY`, `STALE`, `ERROR`, or `FATAL`, the provider's `status` accessor **MUST** support possible value `RECONCILING`. + +In the static-context paradigm, the provider must define a `status` value indicating that it is reconciling its internal state due to a context change. + +see: [provider context reconciliation](#26-provider-context-reconciliation) + +#### Requirement 2.8.3 + +> The provider's `status` **MUST** be `NOT_READY` before `initialize` is called and after `shutdown` terminates. + +Providers which do not define an `initialize` function are assumed to be ready at all times, and their `status` may be `READY` from construction. + +see: [initialization](#24-initialization), [shutdown](#25-shutdown) + +#### Requirement 2.8.4 + +> The provider's `status` accessor **MUST** be safe for concurrent access. + +In languages supporting multi-threaded execution, the provider must ensure that concurrent reads of the `status` accessor do not observe torn or inconsistent values. + +#### Requirement 2.8.5 + +> Status changes and any associated event emissions **MUST** be atomic from the perspective of external observers. + +When a provider transitions between statuses and emits an event associated with that transition, external observers (such as SDK event handlers) must observe a consistent view: the updated `status` value and the emitted event are visible together. +This prevents ordering anomalies where, for example, a `PROVIDER_READY` handler runs while `status` still indicates `NOT_READY` or `ERROR`, or where the provider transitions out of a status before the associated event is dispatched. + +see: [provider events](./05-events.md#51-provider-events) diff --git a/specification/sections/05-events.md b/specification/sections/05-events.md index 17af33d41..f29008a8a 100644 --- a/specification/sections/05-events.md +++ b/specification/sections/05-events.md @@ -31,17 +31,16 @@ see: [domain](../glossary.md#domain) #### Requirement 5.1.1 -> The `provider` **MAY** define a mechanism for signaling the occurrence of one of a set of events, including `PROVIDER_READY`, `PROVIDER_ERROR`, `PROVIDER_CONFIGURATION_CHANGED` and `PROVIDER_STALE`, with a `provider event details` payload. - -Providers cannot emit `PROVIDER_CONTEXT_CHANGED` or `PROVIDER_RECONCILING` events. -These are emitted only by the SDK during context reconciliation. +> The `provider` **MAY** define a mechanism for signaling the occurrence of one of a set of events, including `PROVIDER_READY`, `PROVIDER_ERROR`, `PROVIDER_CONFIGURATION_CHANGED`, `PROVIDER_STALE`, `PROVIDER_RECONCILING`, and `PROVIDER_CONTEXT_CHANGED`, with a `provider event details` payload. +Providers own their status transitions and emit events atomically with those transitions (see [provider status](./02-providers.md#28-provider-status)). If available, native event-emitter or observable/observer language constructs can be used. When a provider is unable to evaluate flags (perhaps due to loss of connection with a remote service) the provider can signal this by emitting a `PROVIDER_ERROR` event. When it recovers, it can emit a `PROVIDER_READY` event. If the error state is irrecoverable, the `PROVIDER_FATAL` error code can be used. If a provider caches rules-sets or previously evaluated flags, and such states cannot be considered up-to-date, the provider can signal this by emitting a `PROVIDER_STALE` event. +Providers in the [static-context paradigm](../glossary.md#static-context-paradigm) additionally emit `PROVIDER_RECONCILING` when starting to reconcile state after a context change, and `PROVIDER_CONTEXT_CHANGED` when that reconciliation terminates normally. see: [provider event types](../types.md#provider-events), [`event details`](../types.md#provider-event-details), [events handlers and context reconciliation](#event-handlers-and-context-reconciliation) @@ -172,10 +171,10 @@ See [provider initialization](./02-providers.md#24-initialization), [setting a p ### Event handlers and context reconciliation Providers built to conform to the static context paradigm feature two additional events: `PROVIDER_RECONCILING` and `PROVIDER_CONTEXT_CHANGED`. -When the provider is reconciling its internal state (the `on context changed` function is running and not yet terminated), the SDK transitions the provider into state `RECONCILING` and then emits `PROVIDER_RECONCILING`. +When the provider is reconciling its internal state (the `on context changed` function is running and not yet terminated), the provider transitions into state `RECONCILING` and emits `PROVIDER_RECONCILING`. This can be particularly useful for displaying loading indicators while the [evaluation context](./03-evaluation-context.md) is being reconciled. -If the `on context changed` function terminates normally, the SDK transitions the provider into the `READY` state and then emits `PROVIDER_CONTEXT_CHANGED`; otherwise it transitions the provider into the `ERROR` state and then emits `PROVIDER_ERROR`. +If the `on context changed` function terminates normally, the provider transitions into the `READY` state and emits `PROVIDER_CONTEXT_CHANGED`; otherwise it transitions into the `ERROR` state and emits `PROVIDER_ERROR`. The `PROVIDER_CONTEXT_CHANGED` is used to signal that the associated context has been changed, and flags should be re-evaluated. This can be particularly useful for triggering UI repaints in multiple components when one component updates the [evaluation context](./03-evaluation-context.md). @@ -205,8 +204,8 @@ see: [static-context paradigm](../glossary.md#static-context-paradigm) > While the provider's `on context changed` function is executing, associated `RECONCILING` handlers **MUST** run. -The implementation must run any `RECONCILING` handlers associated with the provider while the provider is reconciling its state. -In languages with asynchronous semantics, the emission of this event can be skipped if the `on context changed` function of the provider in question executes synchronously for a given provider, no other operations can take place while it runs. +`RECONCILING` handlers associated with the provider must run while the provider is reconciling its state. +In languages with asynchronous semantics, the emission of this event can be skipped if the `on context changed` function of the provider in question executes synchronously; for a given provider, no other operations can take place while it runs. see: [provider event types](../types.md#provider-events), [provider events](#51-provider-events), [provider context reconciliation](02-providers.md#26-provider-context-reconciliation) @@ -214,36 +213,36 @@ see: [provider event types](../types.md#provider-events), [provider events](#51- > If the provider's `on context changed` function terminates normally, and no other invocations have yet to terminate, associated `PROVIDER_CONTEXT_CHANGED` handlers **MUST** run. -The implementation must run any `PROVIDER_CONTEXT_CHANGED` handlers associated with the provider after the provider has reconciled its state and returned from the `on context changed` function. -The `PROVIDER_CONTEXT_CHANGED` is not emitted from the provider itself; the SDK implementation must run the `PROVIDER_CONTEXT_CHANGED` handlers if the `on context changed` function terminates normally. -It's possible that the `on context changed` function is invoked simultaneously or in quick succession; in this case the SDK will only run the `PROVIDER_CONTEXT_CHANGED` handlers after all reentrant invocations have terminated, and the last to terminate was successful (terminated normally). +`PROVIDER_CONTEXT_CHANGED` handlers associated with the provider must run after the provider has reconciled its state and returned from the `on context changed` function. +It's possible that the `on context changed` function is invoked simultaneously or in quick succession; in this case `PROVIDER_CONTEXT_CHANGED` handlers only run after all reentrant invocations have terminated, and the last to terminate was successful (terminated normally). + see: [provider event types](../types.md#provider-events), [provider events](#51-provider-events), [provider context reconciliation](02-providers.md#26-provider-context-reconciliation) ##### Conditional Requirement 5.3.4.3 > If the provider's `on context changed` function terminates abnormally, and no other invocations have yet to terminate, associated `PROVIDER_ERROR` handlers **MUST** run. -The `PROVIDER_ERROR` is not emitted from the provider itself; the SDK implementation must run the `PROVIDER_ERROR` handlers if the `on context changed` throws or otherwise signals an error. -It's possible that the `on context changed` function is invoked simultaneously or in quick succession; in this case the SDK will only run the `PROVIDER_ERROR` handlers after all reentrant invocations have terminated, and the last to terminate was unsuccessful (terminated abnormally). +`PROVIDER_ERROR` handlers associated with the provider must run if the `on context changed` function throws or otherwise signals an error. +It's possible that the `on context changed` function is invoked simultaneously or in quick succession; in this case `PROVIDER_ERROR` handlers only run after all reentrant invocations have terminated, and the last to terminate was unsuccessful (terminated abnormally). see: [provider event types](../types.md#provider-events), [provider events](#51-provider-events), [provider context reconciliation](02-providers.md#26-provider-context-reconciliation) #### Requirement 5.3.5 -> If the provider emits an event, the value of the client's `provider status` **MUST** be updated to the status associated with that event **before** the SDK invokes any event handlers for that event, so that handlers observe a consistent status. +> When a provider emits an event, the value of the `provider status` **MUST** reflect the status associated with that event **before** any event handlers for that event are invoked, so that handlers observe a consistent status. -Some providers may emit events spontaneously, based on changes in their internal state (connections, caches, etc). -The SDK must update its internal representation of the provider's state accordingly: +The provider owns its own status and emits events atomically with status transitions (see [provider status](./02-providers.md#28-provider-status)). +The table below summarizes the association between events and provider status: -| Event | Associated Status | -| -------------------------------- | ------------------------------------------------------- | -| `PROVIDER_READY` | `READY` | -| `PROVIDER_STALE` | `STALE` | -| `PROVIDER_ERROR` | `ERROR`/`FATAL`* | -| `PROVIDER_CONFIGURATION_CHANGED` | N/A (provider remains in its current state) | -| `PROVIDER_CONTEXT_CHANGED` | N/A (only emitted by SDK during context reconciliation) | -| `PROVIDER_RECONCILING` | N/A (only emitted by SDK during context reconciliation) | +| Event | Associated Status | +| -------------------------------- | ------------------------------------------- | +| `PROVIDER_READY` | `READY` | +| `PROVIDER_STALE` | `STALE` | +| `PROVIDER_ERROR` | `ERROR`/`FATAL`* | +| `PROVIDER_CONFIGURATION_CHANGED` | N/A (provider remains in its current state) | +| `PROVIDER_CONTEXT_CHANGED` | `READY` | +| `PROVIDER_RECONCILING` | `RECONCILING` | \* If the `error code` associated with the error indicates `PROVIDER_FATAL`, the state is set to `FATAL` -see: [provider lifecycle management](01-flag-evaluation.md#17-provider-lifecycle-management), [provider status](../types.md#provider-status) [error codes](../types.md#error-code) +see: [provider lifecycle management](01-flag-evaluation.md#17-provider-lifecycle-management), [provider status](../types.md#provider-status), [provider status requirements](./02-providers.md#28-provider-status), [error codes](../types.md#error-code) From fe60ffccd06ab707820a9c0f6c511624384369a5 Mon Sep 17 00:00:00 2001 From: Todd Baert Date: Thu, 23 Apr 2026 07:50:52 -0400 Subject: [PATCH 2/3] fixup: break up migration Signed-off-by: Todd Baert --- specification/appendix-e-migrations.md | 53 +++++++++++++++++++------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/specification/appendix-e-migrations.md b/specification/appendix-e-migrations.md index 8aed01fd4..4cc50fb51 100644 --- a/specification/appendix-e-migrations.md +++ b/specification/appendix-e-migrations.md @@ -1,34 +1,60 @@ --- id: appendix-e title: "Appendix E: Migrations" -description: SDK migration guidance for breaking spec changes +description: Migration guidance for breaking spec changes sidebar_position: 6 --- # Appendix E: Migrations -This appendix provides non-normative guidance for SDK authors on migrating to new or changed specification requirements without breaking existing providers or application consumers. +This appendix provides non-normative guidance for provider authors and SDK authors on migrating to new or changed specification requirements. ## Provider Status Ownership ### Background -Prior to this change, provider status (e.g. `NOT_READY`, `READY`, `ERROR`) was managed by the SDK on behalf of the provider. The SDK would set status and emit events after lifecycle methods (`initialize`, `shutdown`, `on context change`) returned. This created a race condition in multi-threaded SDKs: the provider could change its own state (e.g. emit an error event from a background thread) in the window between the lifecycle method returning and the SDK writing its post-lifecycle status and emitting the corresponding event. The result was incorrect event ordering and inconsistent status. +Prior to `v0.9.0`, provider status (e.g. `NOT_READY`, `READY`, `ERROR`) was managed by the SDK on behalf of the provider. +The SDK would set status and emit events after lifecycle methods (`initialize`, `shutdown`, `on context change`) returned. +This created a race condition in multi-threaded SDKs: the provider could change its own state (e.g. emit an error event from a background thread) in the window between the lifecycle method returning and the SDK writing its post-lifecycle status and emitting the corresponding event. +The result was incorrect event ordering and inconsistent status. The spec now requires providers to own their status and emit events atomically with status transitions (see [provider status](./sections/02-providers.md#28-provider-status)). -### Migration: the `StateManagingProvider` interface +### For provider authors -To avoid breaking existing providers, SDKs should introduce an opt-in interface (or equivalent mechanism) that new providers implement to signal they manage their own status. Existing providers that do not implement this interface continue to have their status managed by the SDK, preserving backwards compatibility. This legacy behavior is deprecated and should be removed in the next major version. +Providers are now responsible for maintaining their own `status` and emitting events atomically with status transitions. -The interface should expose: +#### What to implement + +- A `status` accessor returning the provider's current readiness: `NOT_READY`, `READY`, `STALE`, `ERROR`, or `FATAL` (plus `RECONCILING` in the static-context paradigm) +- `status` must be `NOT_READY` before `initialize` is called and after `shutdown` terminates +- `status` must be safe for concurrent access +- Status transitions and associated event emissions must be atomic from the perspective of external observers; set the status before emitting the corresponding event + +#### The `StateManagingProvider` interface + +To signal to the SDK that your provider manages its own status, implement an opt-in interface (or equivalent mechanism) defined by the SDK. +This interface should expose: - A `status` accessor that returns the provider's current status -- A discriminant or marker (e.g. an additional interface, a boolean property, or a type-level tag) that allows the SDK to detect at registration time whether the provider manages its own state +- A discriminant or marker (e.g. an additional interface, a boolean property, or a type-level tag) that allows the SDK to detect at registration time that the provider manages its own state + +Providers that do not implement this interface will continue to have their status managed by the SDK. +This legacy behavior is deprecated and will be removed in the next major version. + +### For SDK authors + +SDKs must detect whether a registered provider manages its own state and branch behavior accordingly. + +#### Detecting state-managing providers + +At registration time, check whether the provider implements the `StateManagingProvider` interface (or equivalent). +Store this as a flag on the internal provider wrapper for use during lifecycle calls and event handling. -### SDK wrapper behavior +#### SDK wrapper behavior -SDKs typically wrap registered providers in an internal adapter (e.g. a "provider wrapper" or "state manager") that mediates lifecycle calls and event forwarding. The wrapper should branch based on whether the registered provider implements the state-managing interface. +SDKs typically wrap registered providers in an internal adapter (e.g. a "provider wrapper" or "state manager") that mediates lifecycle calls and event forwarding. +The wrapper should branch based on whether the registered provider implements the state-managing interface. ```mermaid flowchart TD @@ -52,7 +78,7 @@ flowchart TD D3 --> E ``` -### What the SDK skips for state-managing providers +#### What the SDK skips for state-managing providers For providers that implement the state-managing interface, the SDK must not perform any of the following actions that it would normally perform for legacy providers: @@ -63,7 +89,7 @@ For providers that implement the state-managing interface, the SDK must not perf - Updating status when the provider emits events at runtime (the provider already set its own status atomically with the event) - (Static-context paradigm only) Setting `RECONCILING` status, emitting `PROVIDER_RECONCILING`, setting `READY`/`ERROR` status, or emitting `PROVIDER_CONTEXT_CHANGED`/`PROVIDER_ERROR` during `on context change` handling -### What the SDK still does for all providers +#### What the SDK still does for all providers Regardless of whether the provider manages its own state, the SDK continues to: @@ -72,6 +98,7 @@ Regardless of whether the provider manages its own state, the SDK continues to: - Run late-attached handlers immediately if the provider is already in the associated state - Enforce short-circuit behavior for `NOT_READY` and `FATAL` statuses during flag evaluation -### Deprecation +#### Deprecation -The legacy path (SDK-managed status) should be deprecated in the release that introduces the state-managing interface, with removal targeted for the next major version. SDK authors should update any first-party providers and provider base classes to implement the new interface. +The legacy path (SDK-managed status) should be deprecated in the release that introduces the state-managing interface, with removal targeted for the next major version. +SDK authors should update any first-party providers and provider base classes to implement the new interface. From 8f1228f391813fbeea9fc2e1fb14862c74042a43 Mon Sep 17 00:00:00 2001 From: Todd Baert Date: Thu, 23 Apr 2026 08:25:38 -0400 Subject: [PATCH 3/3] fixup: keywords and parser Signed-off-by: Todd Baert --- specification.json | 4 ++-- specification/sections/01-flag-evaluation.md | 4 ++-- tools/specification_parser/lint_json_output.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/specification.json b/specification.json index 36ec98781..b355ca25e 100644 --- a/specification.json +++ b/specification.json @@ -310,8 +310,8 @@ { "id": "Conditional Requirement 1.7.2.1", "machine_id": "conditional_requirement_1_7_2_1", - "content": "In addition to `NOT_READY`, `READY`, `STALE`, or `ERROR`, the `provider status` accessor must support possible value `RECONCILING`.", - "RFC 2119 keyword": null, + "content": "In addition to `NOT_READY`, `READY`, `STALE`, `ERROR`, or `FATAL`, the `provider status` accessor MUST support possible value `RECONCILING`.", + "RFC 2119 keyword": "MUST", "children": [] } ] diff --git a/specification/sections/01-flag-evaluation.md b/specification/sections/01-flag-evaluation.md index d7214d5ca..0c6145b5e 100644 --- a/specification/sections/01-flag-evaluation.md +++ b/specification/sections/01-flag-evaluation.md @@ -477,9 +477,9 @@ see: [static-context paradigm](../glossary.md#static-context-paradigm) ##### Conditional Requirement 1.7.2.1 -> In addition to `NOT_READY`, `READY`, `STALE`, or `ERROR`, the `provider status` accessor must support possible value `RECONCILING`. +> In addition to `NOT_READY`, `READY`, `STALE`, `ERROR`, or `FATAL`, the `provider status` accessor **MUST** support possible value `RECONCILING`. -In the static context paradigm, the implementation must define a `provider status` indicating that a provider is reconciling its internal state due to a context change. +In the static context paradigm, the implementation **MUST** define a `provider status` indicating that a provider is reconciling its internal state due to a context change. #### Requirement 1.7.3 diff --git a/tools/specification_parser/lint_json_output.py b/tools/specification_parser/lint_json_output.py index 2073ee359..c3099fb1a 100644 --- a/tools/specification_parser/lint_json_output.py +++ b/tools/specification_parser/lint_json_output.py @@ -23,7 +23,7 @@ def main(f): try: for entry in entries: if entry.get('RFC 2119 keyword') is None and \ - 'condition' not in entry['id'].lower(): + 'requirement' in entry['id'].lower(): print(f"{jsonfile.name}: Rule {entry['id']} is missing a RFC 2119 keyword", file=sys.stderr) errors += 1 pass