Skip to content

Commit 4c1c26f

Browse files
mikeeeantontroshinyaron2
authored
release: test upgrade/downgrade for 1.13/1.14/1.15 + mariner (#1491)
* release: test upgrade/downgrade for 1.13/1.14/1.15 + mariner Signed-off-by: Mike Nguyen <[email protected]> * fix: version skews Co-authored-by: Anton Troshin <[email protected]> Signed-off-by: Mike Nguyen <[email protected]> * Update tests/e2e/upgrade/upgrade_test.go Accepted Co-authored-by: Anton Troshin <[email protected]> Signed-off-by: Yaron Schneider <[email protected]> * Update tests/e2e/upgrade/upgrade_test.go Co-authored-by: Anton Troshin <[email protected]> Signed-off-by: Yaron Schneider <[email protected]> * Fix downgrade issue from 1.15 by deleting previous version scheduler pods Update 1.15 RC to latest RC.18 Signed-off-by: Anton Troshin <[email protected]> * Fix downgrade 1.15 to 1.13 scenario with 0 scheduler pods Signed-off-by: Anton Troshin <[email protected]> * increase update test timeout to 60m and update latest version to 1.15 Signed-off-by: Anton Troshin <[email protected]> * fix httpendpoint tests cleanup and checks Signed-off-by: Anton Troshin <[email protected]> * make sure matrix runs appropriate tests, every matrix ran the same tests Signed-off-by: Anton Troshin <[email protected]> * skip TestKubernetesRunFile on HA Signed-off-by: Anton Troshin <[email protected]> * fix skip TestKubernetesRunFile on HA Signed-off-by: Anton Troshin <[email protected]> * update to latest dapr 1.15.2 Signed-off-by: Anton Troshin <[email protected]> * add logs when waiting for pod deletion Signed-off-by: Anton Troshin <[email protected]> --------- Signed-off-by: Mike Nguyen <[email protected]> Signed-off-by: Yaron Schneider <[email protected]> Signed-off-by: Anton Troshin <[email protected]> Co-authored-by: Anton Troshin <[email protected]> Co-authored-by: Yaron Schneider <[email protected]> Co-authored-by: Anton Troshin <[email protected]>
1 parent a0921c7 commit 4c1c26f

File tree

8 files changed

+298
-58
lines changed

8 files changed

+298
-58
lines changed

.github/workflows/kind_e2e.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ jobs:
173173
export TEST_OUTPUT_FILE=$GITHUB_WORKSPACE/test-e2e-kind.json
174174
echo "TEST_OUTPUT_FILE=$TEST_OUTPUT_FILE" >> $GITHUB_ENV
175175
export GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}
176+
export TEST_DAPR_HA_MODE=${{ matrix.mode }}
176177
make e2e-build-run-k8s
177178
shell: bash
178179
- name: Run tests with Docker hub
@@ -181,6 +182,7 @@ jobs:
181182
export TEST_OUTPUT_FILE=$GITHUB_WORKSPACE/test-e2e-kind.json
182183
echo "TEST_OUTPUT_FILE=$TEST_OUTPUT_FILE" >> $GITHUB_ENV
183184
export GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }}
185+
export TEST_DAPR_HA_MODE=${{ matrix.mode }}
184186
make e2e-build-run-k8s
185187
shell: bash
186188
- name: Upload test results

.github/workflows/upgrade_e2e.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,13 +137,15 @@ jobs:
137137
run: |
138138
export TEST_OUTPUT_FILE=$GITHUB_WORKSPACE/test-e2e-upgrade-kind.json
139139
echo "TEST_OUTPUT_FILE=$TEST_OUTPUT_FILE" >> $GITHUB_ENV
140+
export TEST_DAPR_HA_MODE=${{ matrix.mode }}
140141
make e2e-build-run-upgrade
141142
142143
- name: Run tests with Docker hub
143144
if: github.event.schedule != '0 */6 * * *'
144145
run: |
145146
export TEST_OUTPUT_FILE=$GITHUB_WORKSPACE/test-e2e-upgrade-kind.json
146147
echo "TEST_OUTPUT_FILE=$TEST_OUTPUT_FILE" >> $GITHUB_ENV
148+
export TEST_DAPR_HA_MODE=${{ matrix.mode }}
147149
make e2e-build-run-upgrade
148150
149151
- name: Upload test results

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ e2e-build-run-k8s: build test-e2e-k8s
174174
################################################################################
175175
.PHONY: test-e2e-upgrade
176176
test-e2e-upgrade: test-deps
177-
gotestsum --jsonfile $(TEST_OUTPUT_FILE) --format standard-verbose -- -timeout 40m -count=1 -tags=e2e ./tests/e2e/upgrade/...
177+
gotestsum --jsonfile $(TEST_OUTPUT_FILE) --format standard-verbose -- -timeout 60m -count=1 -tags=e2e ./tests/e2e/upgrade/...
178178

179179
################################################################################
180180
# Build, E2E Tests for Kubernetes Upgrade #

pkg/kubernetes/upgrade.go

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ limitations under the License.
1414
package kubernetes
1515

1616
import (
17+
"context"
18+
"errors"
1719
"fmt"
1820
"net/http"
1921
"os"
@@ -23,8 +25,11 @@ import (
2325
helm "helm.sh/helm/v3/pkg/action"
2426
"helm.sh/helm/v3/pkg/chart"
2527
"helm.sh/helm/v3/pkg/release"
28+
core_v1 "k8s.io/api/core/v1"
29+
meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2630
"k8s.io/helm/pkg/strvals"
2731

32+
"github.com/Masterminds/semver/v3"
2833
"github.com/hashicorp/go-version"
2934

3035
"github.com/dapr/cli/pkg/print"
@@ -49,6 +54,8 @@ var crdsFullResources = []string{
4954
"httpendpoints.dapr.io",
5055
}
5156

57+
var versionWithHAScheduler = semver.MustParse("1.15.0-rc.1")
58+
5259
type UpgradeConfig struct {
5360
RuntimeVersion string
5461
DashboardVersion string
@@ -157,13 +164,40 @@ func Upgrade(conf UpgradeConfig) error {
157164
return err
158165
}
159166

167+
// used to signal the deletion of the scheduler pods only when downgrading from 1.15 to previous versions to handle incompatible changes
168+
// in other cases the channel should be nil
169+
var downgradeDeletionChan chan error
170+
160171
if !isDowngrade(conf.RuntimeVersion, daprVersion) {
161172
err = applyCRDs("v" + conf.RuntimeVersion)
162173
if err != nil {
163174
return fmt.Errorf("unable to apply CRDs: %w", err)
164175
}
165176
} else {
166177
print.InfoStatusEvent(os.Stdout, "Downgrade detected, skipping CRDs.")
178+
179+
targetVersion, errVersion := semver.NewVersion(conf.RuntimeVersion)
180+
if errVersion != nil {
181+
return fmt.Errorf("unable to parse dapr target version: %w", errVersion)
182+
}
183+
184+
currentVersion, errVersion := semver.NewVersion(daprVersion)
185+
if errVersion != nil {
186+
return fmt.Errorf("unable to parse dapr current version: %w", errVersion)
187+
}
188+
189+
if currentVersion.GreaterThanEqual(versionWithHAScheduler) && targetVersion.LessThan(versionWithHAScheduler) {
190+
downgradeDeletionChan = make(chan error)
191+
// Must delete all scheduler pods from cluster due to incompatible changes in version 1.15 with older versions.
192+
go func() {
193+
errDeletion := deleteSchedulerPods(status[0].Namespace, currentVersion, targetVersion)
194+
if errDeletion != nil {
195+
downgradeDeletionChan <- fmt.Errorf("failed to delete scheduler pods: %w", errDeletion)
196+
print.FailureStatusEvent(os.Stderr, "Failed to delete scheduler pods: "+errDeletion.Error())
197+
}
198+
close(downgradeDeletionChan)
199+
}()
200+
}
167201
}
168202

169203
chart, err := GetDaprHelmChartName(helmConf)
@@ -180,6 +214,15 @@ func Upgrade(conf UpgradeConfig) error {
180214
return fmt.Errorf("failure while running upgrade: %w", err)
181215
}
182216

217+
// wait for the deletion of the scheduler pods to finish
218+
if downgradeDeletionChan != nil {
219+
select {
220+
case <-downgradeDeletionChan:
221+
case <-time.After(3 * time.Minute):
222+
return errors.New("timed out waiting for downgrade deletion")
223+
}
224+
}
225+
183226
if dashboardChart != nil {
184227
if dashboardExists {
185228
if _, err = upgradeClient.Run(dashboardReleaseName, dashboardChart, vals); err != nil {
@@ -202,6 +245,73 @@ func Upgrade(conf UpgradeConfig) error {
202245
return nil
203246
}
204247

248+
func deleteSchedulerPods(namespace string, currentVersion *semver.Version, targetVersion *semver.Version) error {
249+
ctxWithTimeout, cancel := context.WithTimeout(context.Background(), time.Second*30)
250+
defer cancel()
251+
252+
var pods *core_v1.PodList
253+
254+
// wait for at least one pod of the target version to be in the list before deleting the rest
255+
// check the label app.kubernetes.io/version to determine the version of the pod
256+
foundTargetVersion := false
257+
for {
258+
if foundTargetVersion {
259+
break
260+
}
261+
k8sClient, err := Client()
262+
if err != nil {
263+
return err
264+
}
265+
266+
pods, err = k8sClient.CoreV1().Pods(namespace).List(ctxWithTimeout, meta_v1.ListOptions{
267+
LabelSelector: "app=dapr-scheduler-server",
268+
})
269+
if err != nil && !errors.Is(err, context.DeadlineExceeded) {
270+
return err
271+
}
272+
273+
if len(pods.Items) == 0 {
274+
return nil
275+
}
276+
277+
for _, pod := range pods.Items {
278+
pv, ok := pod.Labels["app.kubernetes.io/version"]
279+
if ok {
280+
podVersion, err := semver.NewVersion(pv)
281+
if err == nil && podVersion.Equal(targetVersion) {
282+
foundTargetVersion = true
283+
break
284+
}
285+
}
286+
}
287+
time.Sleep(5 * time.Second)
288+
}
289+
290+
if pods == nil {
291+
return errors.New("no scheduler pods found")
292+
}
293+
294+
// get a fresh client to ensure we have the latest state of the cluster
295+
k8sClient, err := Client()
296+
if err != nil {
297+
return err
298+
}
299+
300+
// delete scheduler pods of the current version, i.e. >1.15.0
301+
for _, pod := range pods.Items {
302+
if pv, ok := pod.Labels["app.kubernetes.io/version"]; ok {
303+
podVersion, err := semver.NewVersion(pv)
304+
if err == nil && podVersion.Equal(currentVersion) {
305+
err = k8sClient.CoreV1().Pods(namespace).Delete(ctxWithTimeout, pod.Name, meta_v1.DeleteOptions{})
306+
if err != nil {
307+
return fmt.Errorf("failed to delete pod %s during downgrade: %w", pod.Name, err)
308+
}
309+
}
310+
}
311+
}
312+
return nil
313+
}
314+
205315
// WithRetry enables retry with the specified max retries and retry interval.
206316
func WithRetry(maxRetries int, retryInterval time.Duration) UpgradeOption {
207317
return func(o *UpgradeOptions) {

tests/e2e/common/common.go

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ const (
5656
thirdPartyDevNamespace = "default"
5757
devRedisReleaseName = "dapr-dev-redis"
5858
devZipkinReleaseName = "dapr-dev-zipkin"
59+
60+
DaprModeHA = "ha"
61+
DaprModeNonHA = "non-ha"
5962
)
6063

6164
var (
@@ -120,6 +123,22 @@ func GetRuntimeVersion(t *testing.T, latest bool) *semver.Version {
120123
return runtimeVersion
121124
}
122125

126+
func GetDaprTestHaMode() string {
127+
daprHaMode := os.Getenv("TEST_DAPR_HA_MODE")
128+
if daprHaMode != "" {
129+
return daprHaMode
130+
}
131+
return ""
132+
}
133+
134+
func ShouldSkipTest(mode string) bool {
135+
envDaprHaMode := GetDaprTestHaMode()
136+
if envDaprHaMode != "" {
137+
return envDaprHaMode != mode
138+
}
139+
return false
140+
}
141+
123142
func UpgradeTest(details VersionDetails, opts TestOptions) func(t *testing.T) {
124143
return func(t *testing.T) {
125144
daprPath := GetDaprPath()
@@ -213,7 +232,7 @@ func GetTestsOnInstall(details VersionDetails, opts TestOptions) []TestCase {
213232
{"clusterroles exist " + details.RuntimeVersion, ClusterRolesTest(details, opts)},
214233
{"clusterrolebindings exist " + details.RuntimeVersion, ClusterRoleBindingsTest(details, opts)},
215234
{"apply and check components exist " + details.RuntimeVersion, ComponentsTestOnInstallUpgrade(opts)},
216-
{"apply and check httpendpoints exist " + details.RuntimeVersion, HTTPEndpointsTestOnInstallUpgrade(opts)},
235+
{"apply and check httpendpoints exist " + details.RuntimeVersion, HTTPEndpointsTestOnInstallUpgrade(opts, TestOptions{})},
217236
{"check mtls " + details.RuntimeVersion, MTLSTestOnInstallUpgrade(opts)},
218237
{"status check " + details.RuntimeVersion, StatusTestOnInstallUpgrade(details, opts)},
219238
}
@@ -341,10 +360,10 @@ func ComponentsTestOnInstallUpgrade(opts TestOptions) func(t *testing.T) {
341360
}
342361
}
343362

344-
func HTTPEndpointsTestOnInstallUpgrade(opts TestOptions) func(t *testing.T) {
363+
func HTTPEndpointsTestOnInstallUpgrade(installOpts TestOptions, upgradeOpts TestOptions) func(t *testing.T) {
345364
return func(t *testing.T) {
346365
// if dapr is installed with httpendpoints.
347-
if opts.ApplyHTTPEndpointChanges {
366+
if installOpts.ApplyHTTPEndpointChanges {
348367
// apply any changes to the httpendpoint.
349368
t.Log("apply httpendpoint changes")
350369
output, err := spawn.Command("kubectl", "apply", "-f", "../testdata/namespace.yaml")
@@ -353,12 +372,17 @@ func HTTPEndpointsTestOnInstallUpgrade(opts TestOptions) func(t *testing.T) {
353372
output, err = spawn.Command("kubectl", "apply", "-f", "../testdata/httpendpoint.yaml")
354373
t.Log(output)
355374
require.NoError(t, err, "expected no error on kubectl apply")
356-
require.Equal(t, "httpendpoints.dapr.io/httpendpoint created\nhttpendpoints.dapr.io/httpendpoint created\n", output, "expected output to match")
357-
httpEndpointOutputCheck(t, output)
375+
376+
if installOpts.ApplyHTTPEndpointChanges && upgradeOpts.ApplyHTTPEndpointChanges {
377+
require.Equal(t, "httpendpoint.dapr.io/httpendpoint unchanged\n", output, "expected output to match")
378+
} else {
379+
require.Equal(t, "httpendpoint.dapr.io/httpendpoint created\n", output, "expected output to match")
380+
}
358381

359382
t.Log("check applied httpendpoint exists")
360-
_, err = spawn.Command("kubectl", "get", "httpendpoint")
383+
output, err = spawn.Command("kubectl", "get", "httpendpoint")
361384
require.NoError(t, err, "expected no error on calling to retrieve httpendpoints")
385+
httpEndpointOutputCheck(t, output)
362386
}
363387
}
364388
}
@@ -984,7 +1008,7 @@ func componentOutputCheck(t *testing.T, opts TestOptions, output string) {
9841008
return
9851009
}
9861010

987-
lines = strings.Split(output, "\n")[2:] // remove header and warning message.
1011+
lines = lines[2:] // remove header and warning message.
9881012

9891013
if opts.DevEnabled {
9901014
// default, test statestore.
@@ -1152,6 +1176,8 @@ func waitPodDeletionDev(t *testing.T, done, podsDeleted chan struct{}) {
11521176
devRedisReleaseName: "dapr-dev-redis-master-",
11531177
devZipkinReleaseName: "dapr-dev-zipkin-",
11541178
}
1179+
1180+
t.Logf("dev pods waiting to be deleted: %d", len(list.Items))
11551181
for _, pod := range list.Items {
11561182
t.Log(pod.ObjectMeta.Name)
11571183
for component, prefix := range prefixes {
@@ -1169,7 +1195,7 @@ func waitPodDeletionDev(t *testing.T, done, podsDeleted chan struct{}) {
11691195
if len(found) == 2 {
11701196
podsDeleted <- struct{}{}
11711197
}
1172-
time.Sleep(15 * time.Second)
1198+
time.Sleep(10 * time.Second)
11731199
}
11741200
}
11751201

@@ -1181,19 +1207,28 @@ func waitPodDeletion(t *testing.T, done, podsDeleted chan struct{}) {
11811207
default:
11821208
break
11831209
}
1210+
11841211
ctx := context.Background()
11851212
ctxt, cancel := context.WithTimeout(ctx, 10*time.Second)
11861213
defer cancel()
1214+
11871215
k8sClient, err := getClient()
11881216
require.NoError(t, err, "error getting k8s client for pods check")
1217+
11891218
list, err := k8sClient.CoreV1().Pods(DaprTestNamespace).List(ctxt, v1.ListOptions{
11901219
Limit: 100,
11911220
})
11921221
require.NoError(t, err, "error getting pods list from k8s")
1222+
11931223
if len(list.Items) == 0 {
11941224
podsDeleted <- struct{}{}
1225+
} else {
1226+
t.Logf("pods waiting to be deleted: %d", len(list.Items))
1227+
for _, pod := range list.Items {
1228+
t.Log(pod.ObjectMeta.Name)
1229+
}
11951230
}
1196-
time.Sleep(15 * time.Second)
1231+
time.Sleep(5 * time.Second)
11971232
}
11981233
}
11991234

@@ -1214,7 +1249,8 @@ func waitAllPodsRunning(t *testing.T, namespace string, haEnabled bool, done, po
12141249
Limit: 100,
12151250
})
12161251
require.NoError(t, err, "error getting pods list from k8s")
1217-
t.Logf("items %d", len(list.Items))
1252+
1253+
t.Logf("waiting for pods to be running, current count: %d", len(list.Items))
12181254
countOfReadyPods := 0
12191255
for _, item := range list.Items {
12201256
t.Log(item.ObjectMeta.Name)
@@ -1235,11 +1271,11 @@ func waitAllPodsRunning(t *testing.T, namespace string, haEnabled bool, done, po
12351271
if err != nil {
12361272
t.Error(err)
12371273
}
1238-
if len(list.Items) == countOfReadyPods && ((haEnabled && countOfReadyPods == pods) || (!haEnabled && countOfReadyPods == pods)) {
1274+
if len(list.Items) == countOfReadyPods && countOfReadyPods == pods {
12391275
podsRunning <- struct{}{}
12401276
}
12411277

1242-
time.Sleep(15 * time.Second)
1278+
time.Sleep(5 * time.Second)
12431279
}
12441280
}
12451281

0 commit comments

Comments
 (0)