Skip to content

Commit 9206f15

Browse files
authored
Refactor versioning API (#237)
* Simpler representation of version "graph" * Support id for each compatible set * Support for future workflow bundles
1 parent 35e9e70 commit 9206f15

File tree

7 files changed

+150
-73
lines changed

7 files changed

+150
-73
lines changed

buf.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
version: v1
22
breaking:
3+
ignore:
4+
# TODO: Remove after PR 237
5+
- temporal/api/taskqueue/v1
6+
- temporal/api/workflowservice/v1
7+
- temporal/api/history/v1
38
use:
49
- PACKAGE
510
lint:

temporal/api/common/v1/message.proto

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,20 @@ message MeteringMetadata {
121121
// aip.dev/not-precedent: Negative values make no sense to represent. --)
122122
uint32 nonfirst_local_activity_execution_attempts = 13;
123123
}
124+
125+
// Identifies the version(s) of a worker that processed a task
126+
message WorkerVersionStamp {
127+
// An opaque whole-worker identifier
128+
string build_id = 1;
129+
// Set if the worker used a dynamically loadable bundle to process
130+
// the task. The bundle could be a WASM blob, JS bundle, etc.
131+
string bundle_id = 2;
132+
}
133+
134+
// Identifies the version(s) that a worker is compatible with when polling or identifying itself
135+
message WorkerVersionCapabilities {
136+
// An opaque whole-worker identifier
137+
string build_id = 1;
138+
139+
// Later, may include info like "I can process WASM and/or JS bundles"
140+
}

temporal/api/history/v1/message.proto

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,10 @@ message WorkflowTaskCompletedEventAttributes {
193193
string identity = 3;
194194
// Binary ID of the worker who completed this task
195195
string binary_checksum = 4;
196-
// ID of the worker who picked up this workflow task, or missing if worker
197-
// is not using versioning.
198-
temporal.api.taskqueue.v1.VersionId worker_versioning_id = 5;
196+
// Version info of the worker who processed this workflow task, or missing if worker is not
197+
// using versioning. If present, the `build_id` field within is also used as `binary_checksum`,
198+
// which may be omitted in that case (it may also be populated to preserve compatability).
199+
temporal.api.common.v1.WorkerVersionStamp worker_version = 5;
199200
// Data the SDK wishes to record for itself, but server need not interpret, and does not
200201
// directly impact workflow state.
201202
temporal.api.sdk.v1.WorkflowTaskCompletedMetadata sdk_metadata = 6;

temporal/api/taskqueue/v1/message.proto

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import "google/protobuf/wrappers.proto";
3838
import "dependencies/gogoproto/gogo.proto";
3939

4040
import "temporal/api/enums/v1/task_queue.proto";
41+
import "temporal/api/common/v1/message.proto";
4142

4243
// See https://docs.temporal.io/docs/concepts/task-queues/
4344
message TaskQueue {
@@ -71,13 +72,12 @@ message TaskQueuePartitionMetadata {
7172
}
7273

7374
message PollerInfo {
74-
// Unix Nano
7575
google.protobuf.Timestamp last_access_time = 1 [(gogoproto.stdtime) = true];
7676
string identity = 2;
7777
double rate_per_second = 3;
78-
// If a worker has specified an ID for use with the worker versioning feature while polling,
79-
// that id must appear here.
80-
VersionId worker_versioning_id = 4;
78+
// If a worker has opted into the worker versioning feature while polling, its capabilities will
79+
// appear here.
80+
temporal.api.common.v1.WorkerVersionCapabilities worker_version_capabilities = 4;
8181
}
8282

8383
message StickyExecutionAttributes {
@@ -87,22 +87,12 @@ message StickyExecutionAttributes {
8787
google.protobuf.Duration schedule_to_start_timeout = 2 [(gogoproto.stdduration) = true];
8888
}
8989

90-
// Used by the worker versioning APIs, represents a node in the version graph for a particular
91-
// task queue
92-
message VersionIdNode {
93-
VersionId version = 1;
94-
// A pointer to the previous version this version is considered to be compatible with
95-
VersionIdNode previous_compatible = 2;
96-
// A pointer to the last incompatible version (previous major version)
97-
VersionIdNode previous_incompatible = 3;
90+
// Used by the worker versioning APIs, represents an ordering of one or more versions which are
91+
// considered to be compatible with each other. Currently the versions are always worker build ids.
92+
message CompatibleVersionSet {
93+
// A unique identifier for this version set. Users don't need to understand or care about this
94+
// value, but it has value for debugging purposes.
95+
string version_set_id = 1;
96+
// All the compatible versions, ordered from oldest to newest
97+
repeated string build_ids = 2;
9898
}
99-
100-
// Used by the worker versioning APIs, represents a specific version of something
101-
// Currently, that's just a whole-worker id. In the future, if we support
102-
// WASM workflow bundle based versioning, for example, then the inside of this
103-
// message may become a oneof of different version types.
104-
message VersionId {
105-
// An opaque whole-worker identifier
106-
string worker_build_id = 1;
107-
}
108-

temporal/api/workflow/v1/message.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ message WorkflowExecutionInfo {
5757
string task_queue = 13;
5858
int64 state_transition_count = 14;
5959
int64 history_size_bytes = 15;
60+
// If set, the most recent worker version stamp that appeared in a workflow task completion
61+
temporal.api.common.v1.WorkerVersionStamp most_recent_worker_version_stamp = 16;
6062
}
6163

6264
message WorkflowExecutionConfig {

temporal/api/workflowservice/v1/request_response.proto

Lines changed: 102 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -237,12 +237,12 @@ message PollWorkflowTaskQueueRequest {
237237
// Each worker process should provide an ID unique to the specific set of code it is running
238238
// "checksum" in this field name isn't very accurate, it should be though of as an id.
239239
string binary_checksum = 4;
240-
// If set, the worker is opting in to build-id based versioning and wishes to only
241-
// receive tasks that are considered compatible with the version provided.
240+
// If set, the worker is opting in to versioning and wishes to only
241+
// receive tasks that are considered compatible with the version capabilities provided.
242242
// Doing so only makes sense in conjunction with the `UpdateWorkerBuildIdOrdering` API.
243-
// When `worker_versioning_id` has a `worker_build_id`, and `binary_checksum` is not
243+
// When this field has a `worker_build_id`, and `binary_checksum` is not
244244
// set, that value should also be considered as the `binary_checksum`.
245-
temporal.api.taskqueue.v1.VersionId worker_versioning_id = 5;
245+
temporal.api.common.v1.WorkerVersionCapabilities worker_version_capabilities = 5;
246246
}
247247

248248
message PollWorkflowTaskQueueResponse {
@@ -310,11 +310,12 @@ message RespondWorkflowTaskCompletedRequest {
310310
// Responses to the `queries` field in the task being responded to
311311
map<string, temporal.api.query.v1.WorkflowQueryResult> query_results = 8;
312312
string namespace = 9;
313-
// If using versioning, worker should send the same id here that it used to
314-
// poll for the workflow task.
315-
// When `worker_versioning_id` has a `worker_build_id`, and `binary_checksum` is not
316-
// set, that value should also be considered as the `binary_checksum`.
317-
temporal.api.taskqueue.v1.VersionId worker_versioning_id = 10;
313+
// If using versioning, the worker uses this field to indicate what version(s) it used to
314+
// process the task. When this field has a `worker_build_id`, and `binary_checksum` is not set,
315+
// that value should also be considered as the `binary_checksum`. Leaving this field empty when
316+
// replying to a task has had this field previously populated in history in an error, and such
317+
// a completion will be rejected.
318+
temporal.api.common.v1.WorkerVersionStamp worker_version_stamp = 10;
318319
// Protocol messages piggybacking on a WFT as a transport
319320
repeated temporal.api.protocol.v1.Message messages = 11;
320321
// Data the SDK wishes to record for itself, but server need not interpret, and does not
@@ -359,10 +360,10 @@ message PollActivityTaskQueueRequest {
359360
// The identity of the worker/client
360361
string identity = 3;
361362
temporal.api.taskqueue.v1.TaskQueueMetadata task_queue_metadata = 4;
362-
// If set, the worker is opting in to build-id based versioning and wishes to only
363-
// receive tasks that are considered compatible with the version provided.
363+
// If set, the worker is opting in to versioning and wishes to only
364+
// receive tasks that are considered compatible with the capabilities provided.
364365
// Doing so only makes sense in conjunction with the `UpdateWorkerBuildIdOrdering` API.
365-
temporal.api.taskqueue.v1.VersionId worker_versioning_id = 5;
366+
temporal.api.common.v1.WorkerVersionCapabilities worker_version_capabilities = 5;
366367
}
367368

368369
message PollActivityTaskQueueResponse {
@@ -1041,44 +1042,103 @@ message ListSchedulesResponse {
10411042
// aip.dev/not-precedent: UpdateWorkerBuildIdOrderingRequest doesn't follow Google API format --)
10421043
// (-- api-linter: core::0134::request-resource-required=disabled
10431044
// aip.dev/not-precedent: UpdateWorkerBuildIdOrderingRequest RPC doesn't follow Google API format. --)
1044-
message UpdateWorkerBuildIdOrderingRequest {
1045+
message UpdateWorkerBuildIdCompatabilityRequest {
1046+
message AddNewCompatibleVersion {
1047+
// A new id to be added to an existing compatible set.
1048+
string new_build_id = 1;
1049+
// A build id which must already exist in the version sets known by the task queue. The new
1050+
// id will be stored in the set containing this id, marking it as compatible with
1051+
// the versions within.
1052+
string existing_compatible_build_id = 2;
1053+
// When set, establishes the compatible set being targeted as the overall default for the
1054+
// queue. If a different set was the current default, the targeted set will replace it as
1055+
// the new default.
1056+
bool make_set_default = 3;
1057+
}
1058+
10451059
string namespace = 1;
1046-
// Must be set, the task queue to apply changes to. Because all workers on
1047-
// a given task queue must have the same set of workflow & activity
1048-
// implementations, there is no reason to specify a task queue type here.
1060+
// Must be set, the task queue to apply changes to. Because all workers on a given task queue
1061+
// must have the same set of workflow & activity implementations, there is no reason to specify
1062+
// a task queue type here.
10491063
string task_queue = 2;
1050-
// The version id we are targeting.
1051-
temporal.api.taskqueue.v1.VersionId version_id = 3;
1052-
// When set, indicates that the `version_id` in this message is compatible
1053-
// with the one specified in this field. Because compatability should form
1054-
// a DAG, any build id can only be the "next compatible" version for one
1055-
// other ID of a certain type at a time, and any setting which would create a cycle is invalid.
1056-
temporal.api.taskqueue.v1.VersionId previous_compatible = 4;
1057-
// When set, establishes the specified `version_id` as the default of it's type
1058-
// for the queue. Workers matching it will begin processing new workflow executions.
1059-
// The existing default will be marked as a previous incompatible version
1060-
// to this one, assuming it is not also in `is_compatible_with`.
1061-
bool become_default = 5;
1062-
}
1063-
message UpdateWorkerBuildIdOrderingResponse {}
1064+
oneof operation {
1065+
// A new build id. This operation will create a new set which will be the new overall
1066+
// default version for the queue, with this id as its only member. This new set is
1067+
// incompatible with all previous sets/versions.
1068+
//
1069+
// (-- api-linter: core::0140::prepositions=disabled
1070+
// aip.dev/not-precedent: In makes perfect sense here. --)
1071+
string add_new_build_id_in_new_default_set = 3;
1072+
// Adds a new id to an existing compatible set, see sub-message definition for more.
1073+
AddNewCompatibleVersion add_new_compatible_build_id = 4;
1074+
// Promote an existing set to be the current default (if it isn't already) by targeting
1075+
// an existing build id within it. This field's value is the extant build id.
1076+
//
1077+
// (-- api-linter: core::0140::prepositions=disabled
1078+
// aip.dev/not-precedent: Names are hard. --)
1079+
string promote_set_by_build_id = 5;
1080+
// Promote an existing build id within some set to be the current default for that set.
1081+
//
1082+
// (-- api-linter: core::0140::prepositions=disabled
1083+
// aip.dev/not-precedent: Within makes perfect sense here. --)
1084+
string promote_build_id_within_set = 6;
1085+
}
1086+
}
1087+
message UpdateWorkerBuildIdCompatabilityResponse {
1088+
// The id of the compatible set that the updated version was added to, or exists in. Users don't
1089+
// need to understand or care about this value, but it has value for debugging purposes.
1090+
string version_set_id = 1;
1091+
}
10641092

10651093
// (-- api-linter: core::0134::request-resource-required=disabled
10661094
// aip.dev/not-precedent: GetWorkerBuildIdOrderingRequest RPC doesn't follow Google API format. --)
1067-
message GetWorkerBuildIdOrderingRequest {
1095+
message GetWorkerBuildIdCompatabilityRequest {
10681096
string namespace = 1;
10691097
// Must be set, the task queue to interrogate about worker id ordering
10701098
string task_queue = 2;
1071-
// Limits how deep the returned DAG will go. 1 will return only the
1072-
// default build id. A default/0 value will return the entire graph.
1073-
int32 max_depth = 3;
1074-
}
1075-
message GetWorkerBuildIdOrderingResponse {
1076-
// The currently established default version
1077-
temporal.api.taskqueue.v1.VersionIdNode current_default = 1;
1078-
// Other current latest-compatible versions who are not the overall default. These are the
1079-
// versions that will be used when generating new tasks by following the graph from the
1080-
// version of the last task out to a leaf.
1081-
repeated temporal.api.taskqueue.v1.VersionIdNode compatible_leaves = 2;
1099+
// Limits how many compatible sets will be returned. Specify 1 to only return the current
1100+
// default major version set. 0 returns all sets.
1101+
int32 max_sets = 3;
1102+
// If set, the response will include information about worker versions which are ready to be
1103+
// retired.
1104+
bool include_retirement_candidates = 4;
1105+
// If set, the response will include information about which versions have open workflows, and
1106+
// whether or not there are currently polling workers who are compatible with those versions.
1107+
bool include_poller_compatability = 5;
1108+
}
1109+
message GetWorkerBuildIdCompatabilityResponse {
1110+
// Major version sets, in order from oldest to newest. The last element of the list will always
1111+
// be the current default major version. IE: New workflows will target the most recent version
1112+
// in that version set.
1113+
//
1114+
// There may be fewer sets returned than exist, if the request chose to limit this response.
1115+
repeated temporal.api.taskqueue.v1.CompatibleVersionSet major_version_sets = 1;
1116+
1117+
message RetirementCandidate {
1118+
// The worker build id which is ready for retirement
1119+
string build_id = 1;
1120+
// If true, there are no open *or* closed workflows, meaning there is no reason at all
1121+
// to keep the worker alive, not even to service queries on closed workflows. If not true,
1122+
// then there are no open workflows, but some closed ones.
1123+
bool all_workflows_are_archived = 2;
1124+
// Currently polling workers who match the build id ready for retirement
1125+
repeated temporal.api.taskqueue.v1.PollerInfo pollers = 3;
1126+
}
1127+
1128+
// A list of workers who are still live and polling the task queue, but may no longer be needed
1129+
// to make progress on open workflows.
1130+
repeated RetirementCandidate retirement_candidates = 2;
1131+
1132+
message VersionsWithCompatiblePollers {
1133+
// The latest build id which completed a workflow task on some open workflow
1134+
string most_recent_build_id = 1;
1135+
// Currently polling workers who are compatible with `most_recent_build_id`.
1136+
repeated temporal.api.taskqueue.v1.PollerInfo pollers = 2;
1137+
}
1138+
1139+
// A list of versions and pollers who are capable of processing tasks at that version (if any)
1140+
// for which there are currently open workflows.
1141+
repeated VersionsWithCompatiblePollers active_versions_and_pollers = 3;
10821142
}
10831143

10841144
// (-- api-linter: core::0134=disabled

temporal/api/workflowservice/v1/service.proto

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -380,16 +380,18 @@ service WorkflowService {
380380
rpc ListSchedules (ListSchedulesRequest) returns (ListSchedulesResponse) {
381381
}
382382

383-
// Allows users to specify a graph of worker build id based versions on a
384-
// per task queue basis. Versions are ordered, and may be either compatible
385-
// with some extant version, or a new incompatible version.
383+
// Allows users to specify sets of worker build id versions on a per task queue basis. Versions
384+
// are ordered, and may be either compatible with some extant version, or a new incompatible
385+
// version, forming sets of ids which are incompatible with each other, but whose contained
386+
// members are compatible with one another.
387+
//
386388
// (-- api-linter: core::0134::response-message-name=disabled
387389
// aip.dev/not-precedent: UpdateWorkerBuildIdOrdering RPC doesn't follow Google API format. --)
388390
// (-- api-linter: core::0134::method-signature=disabled
389391
// aip.dev/not-precedent: UpdateWorkerBuildIdOrdering RPC doesn't follow Google API format. --)
390-
rpc UpdateWorkerBuildIdOrdering (UpdateWorkerBuildIdOrderingRequest) returns (UpdateWorkerBuildIdOrderingResponse) {}
391-
// Fetches the worker build id versioning graph for some task queue.
392-
rpc GetWorkerBuildIdOrdering (GetWorkerBuildIdOrderingRequest) returns (GetWorkerBuildIdOrderingResponse) {}
392+
rpc UpdateWorkerBuildIdCompatability (UpdateWorkerBuildIdCompatabilityRequest) returns (UpdateWorkerBuildIdCompatabilityResponse) {}
393+
// Fetches the worker build id versioning sets for some task queue and related metadata.
394+
rpc GetWorkerBuildIdCompatability (GetWorkerBuildIdCompatabilityRequest) returns (GetWorkerBuildIdCompatabilityResponse) {}
393395

394396
// Invokes the specified update function on user workflow code.
395397
// (-- api-linter: core::0134=disabled

0 commit comments

Comments
 (0)