Skip to content

Commit 7183025

Browse files
authored
fix(tests): migrate NexusApiTestSuite to parallelsuite, fix versioning flag and query pagination (#9910)
## What changed? Two test fixes targeting consistent CI failures across all recent runs, plus a migration of `NexusApiTestSuite` to `parallelsuite`. ## Why? Both failures reproduced in 9–10/10 of the last 10 CI runs: **1. `TestNexusStartOperation_WithNamespaceAndTaskQueue_SupportsVersioning`** (10/10 runs) The test calls `UpdateWorkerBuildIdCompatibility`, which is the v0.1 (Version Set / Build ID compat) API gated by `FrontendEnableWorkerVersioningDataAPIs`. The test only enabled `FrontendEnableWorkerVersioningRuleAPIs` (the v0.2 Rules API), so the v0.1 call was rejected with a `PermissionDenied` error. **2. `TestQueryWorkflow_NonStickyMultiPageHistory`** (9/10 runs) The test asserted `NextPageToken != nil` after fetching workflow history. The shared functional test cluster sets `SendRawHistoryBetweenInternalServices=true` (in `tests/testcore/dynamic_config_overrides.go`), which causes the history service to return all events in one shot — making `NextPageToken` always nil regardless of the `MatchingHistoryMaxPageSize=2` override. ## How did you test it? **Fix 1 — nexus versioning test (`tests/nexus_api_test.go`):** Added `env.OverrideDynamicConfig(dynamicconfig.FrontendEnableWorkerVersioningDataAPIs, true)` alongside the existing `RuleAPIs` override so the v0.1 API is available for the test. Also migrated `NexusApiTestSuite` from the custom `NexusTestBaseSuite` (which used a `useTemporalFailures` field mutated before `suite.Run`) to `parallelsuite.Suite[*NexusApiTestSuite]`. The `useTemporalFailures` flag is now a typed parameter passed through `parallelsuite.Run`, eliminating shared mutable state between the two suite invocations. **Fix 2 — query pagination test (`tests/query_workflow_test.go`):** Extracted `TestQueryWorkflow_NonStickyMultiPageHistory` from `QueryWorkflowSuite` into a standalone top-level test that uses `testcore.NewEnv` with: - `testcore.WithDedicatedCluster()` — isolated cluster not affected by the global config override - `testcore.WithDynamicConfig(dynamicconfig.SendRawHistoryBetweenInternalServices, false)` — disables raw history so pagination happens normally - `testcore.WithDynamicConfig(dynamicconfig.MatchingHistoryMaxPageSize, 2)` — forces multi-page history - [x] built - [x] added new functional test(s) (standalone query test) - [x] covered by existing tests (nexus suite) ## Potential risks The `NexusApiTestSuite` migration to `parallelsuite` changes how `useTemporalFailures` is threaded through tests. The two existing top-level runners (`TestNexusApiTestSuiteWithLegacyErrorPaths` / `WithTemporalFailures`) are preserved; the flag is now a compile-time-checked parameter rather than a struct field.
1 parent c20b4bb commit 7183025

3 files changed

Lines changed: 49 additions & 28 deletions

File tree

tests/nexus_api_test.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -610,8 +610,13 @@ func (s *NexusApiTestSuite) TestNexusCancelOperation_Outcomes(useTemporalFailure
610610
}
611611

612612
func (s *NexusApiTestSuite) TestNexusStartOperation_WithNamespaceAndTaskQueue_SupportsVersioning(useTemporalFailures bool) {
613-
env := newNexusTestEnv(s.T(), useTemporalFailures, testcore.WithDedicatedCluster())
614-
env.OverrideDynamicConfig(dynamicconfig.FrontendEnableWorkerVersioningRuleAPIs, true)
613+
env := newNexusTestEnv(s.T(), useTemporalFailures,
614+
testcore.WithDedicatedCluster(),
615+
testcore.WithDynamicConfig(dynamicconfig.FrontendEnableWorkerVersioningRuleAPIs, true),
616+
// UpdateWorkerBuildIdCompatibility is the v0.1 (Version Set-based) API gated by DataAPIs.
617+
testcore.WithDynamicConfig(dynamicconfig.FrontendEnableWorkerVersioningDataAPIs, true),
618+
)
619+
615620
ctx, cancel := context.WithCancel(testcore.NewContext())
616621
defer cancel()
617622
taskQueue := testcore.RandomizeStr("task-queue")

tests/nexus_test_base.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,13 @@ func (env *NexusTestEnv) createNexusEndpoint(t *testing.T, name string, taskQueu
4747
},
4848
})
4949
require.NoError(t, err)
50+
t.Cleanup(func() {
51+
// Delete the endpoint so the cluster can be safely reused by subsequent tests.
52+
_, _ = env.OperatorClient().DeleteNexusEndpoint(testcore.NewContext(), &operatorservice.DeleteNexusEndpointRequest{
53+
Id: resp.Endpoint.Id,
54+
Version: resp.Endpoint.Version,
55+
})
56+
})
5057
return resp.Endpoint
5158
}
5259

tests/query_workflow_test.go

Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -343,11 +343,18 @@ func (s *QueryWorkflowSuite) TestQueryWorkflow_ClosedWithoutWorkflowTaskStarted(
343343
// non-sticky query task poll is a valid HistoryContinuation token usable with
344344
// GetWorkflowExecutionHistory. Fails with "Invalid NextPageToken" if matching service
345345
// returns a RawHistoryContinuation token instead.
346-
func (s *QueryWorkflowSuite) TestQueryWorkflow_NonStickyMultiPageHistory() {
347-
// Small page size forces pagination in matching service.
348-
s.OverrideDynamicConfig(dynamicconfig.MatchingHistoryMaxPageSize, 2)
349-
350-
s.SdkWorker().Stop()
346+
//
347+
// Uses a dedicated cluster with MatchingHistoryMaxPageSize=2. With the default
348+
// SendRawHistoryBetweenInternalServices=true, the raw blob path paginates at the blob
349+
// level: ReadFullPageRawEvents stops after 2 blobs, leaving a non-empty PersistenceToken
350+
// even when all events fit in a single Cassandra logical page. This ensures NextPageToken
351+
// is non-empty, which is what we need to verify it's a valid HistoryContinuation token.
352+
func TestQueryWorkflow_NonStickyMultiPageHistory(t *testing.T) {
353+
t.Parallel()
354+
env := testcore.NewEnv(t,
355+
testcore.WithDedicatedCluster(),
356+
testcore.WithDynamicConfig(dynamicconfig.MatchingHistoryMaxPageSize, 2),
357+
)
351358

352359
activityFn := func(ctx context.Context) error { return nil }
353360
workflowFn := func(ctx workflow.Context) (string, error) {
@@ -365,27 +372,28 @@ func (s *QueryWorkflowSuite) TestQueryWorkflow_NonStickyMultiPageHistory() {
365372
return "done", nil
366373
}
367374

375+
tq := env.WorkerTaskQueue()
368376
id := "test-query-non-sticky-multi-page"
369-
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
377+
ctx, cancel := context.WithTimeout(env.Context(), 30*time.Second)
370378
defer cancel()
371379

372-
queryWorker := worker.New(s.SdkClient(), s.TaskQueue(), worker.Options{})
380+
queryWorker := worker.New(env.SdkClient(), tq, worker.Options{})
373381
queryWorker.RegisterWorkflow(workflowFn)
374382
queryWorker.RegisterActivity(activityFn)
375-
s.NoError(queryWorker.Start())
383+
env.NoError(queryWorker.Start())
376384

377-
workflowRun, err := s.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{
385+
workflowRun, err := env.SdkClient().ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{
378386
ID: id,
379-
TaskQueue: s.TaskQueue(),
387+
TaskQueue: tq,
380388
WorkflowRunTimeout: 20 * time.Second,
381389
}, workflowFn)
382-
s.NoError(err)
383-
s.NotNil(workflowRun)
390+
env.NoError(err)
391+
env.NotNil(workflowRun)
384392

385393
// Wait for all activities to complete, generating many event batches.
386-
s.Eventually(func() bool {
387-
resp, err := s.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{
388-
Namespace: s.Namespace().String(),
394+
env.Eventually(func() bool {
395+
resp, err := env.FrontendClient().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{
396+
Namespace: env.Namespace().String(),
389397
Execution: &commonpb.WorkflowExecution{WorkflowId: id},
390398
})
391399
return err == nil && resp.GetWorkflowExecutionInfo().GetHistoryLength() > 10
@@ -395,33 +403,34 @@ func (s *QueryWorkflowSuite) TestQueryWorkflow_NonStickyMultiPageHistory() {
395403
queryWorker.Stop()
396404

397405
// Issue a query in background; we'll poll for the task manually below.
398-
go func() { _, _ = s.SdkClient().QueryWorkflow(ctx, id, "", "test") }()
406+
// Don't assert inside the goroutine — it would panic if the test completes first.
407+
go func() { _, _ = env.SdkClient().QueryWorkflow(ctx, id, "", "test") }()
399408

400409
// Poll for the query task on the normal (non-sticky) task queue.
401410
var pollResp *workflowservice.PollWorkflowTaskQueueResponse
402-
s.Eventually(func() bool {
411+
env.Eventually(func() bool {
403412
pollCtx, pollCancel := context.WithTimeout(ctx, 3*time.Second)
404413
defer pollCancel()
405-
pollResp, err = s.FrontendClient().PollWorkflowTaskQueue(pollCtx, &workflowservice.PollWorkflowTaskQueueRequest{
406-
Namespace: s.Namespace().String(),
407-
TaskQueue: &taskqueuepb.TaskQueue{Name: s.TaskQueue(), Kind: enumspb.TASK_QUEUE_KIND_NORMAL},
414+
pollResp, err = env.FrontendClient().PollWorkflowTaskQueue(pollCtx, &workflowservice.PollWorkflowTaskQueueRequest{
415+
Namespace: env.Namespace().String(),
416+
TaskQueue: &taskqueuepb.TaskQueue{Name: tq, Kind: enumspb.TASK_QUEUE_KIND_NORMAL},
408417
Identity: "test-worker",
409418
})
410419
return err == nil && len(pollResp.GetTaskToken()) > 0
411420
}, 10*time.Second, 100*time.Millisecond)
412421

413-
s.NotNil(pollResp.GetHistory())
414-
s.NotEmpty(pollResp.GetNextPageToken(), "multi-page history should have NextPageToken")
422+
env.NotNil(pollResp.GetHistory())
423+
env.NotEmpty(pollResp.GetNextPageToken(), "multi-page history should have NextPageToken")
415424

416425
// Use the token with GetWorkflowExecutionHistory — this is what the worker SDK does.
417426
// Fails with "Invalid NextPageToken" if the token is a RawHistoryContinuation.
418-
histResp, err := s.FrontendClient().GetWorkflowExecutionHistory(ctx, &workflowservice.GetWorkflowExecutionHistoryRequest{
419-
Namespace: s.Namespace().String(),
427+
histResp, err := env.FrontendClient().GetWorkflowExecutionHistory(ctx, &workflowservice.GetWorkflowExecutionHistoryRequest{
428+
Namespace: env.Namespace().String(),
420429
Execution: &commonpb.WorkflowExecution{WorkflowId: id},
421430
NextPageToken: pollResp.GetNextPageToken(),
422431
})
423-
s.NoError(err)
424-
s.NotNil(histResp)
432+
env.NoError(err)
433+
env.NotNil(histResp)
425434
}
426435

427436
func (s *QueryWorkflowSuite) TestQueryWorkflow_FailurePropagated() {

0 commit comments

Comments
 (0)