Skip to content
Open
Show file tree
Hide file tree
Changes from 105 commits
Commits
Show all changes
119 commits
Select commit Hold shift + click to select a range
09fe843
fix(cosmovisor): make manual upgrades use halt-height
aaronc May 14, 2025
4087e2b
WIP on mock-node
aaronc May 15, 2025
5689795
WIP on watcher
aaronc May 16, 2025
c489524
poll watcher tests pass
aaronc May 21, 2025
2277ffb
data watcher tests
aaronc May 21, 2025
f0f5caf
notes
aaronc May 22, 2025
3f628b9
use common logic for add-batch-upgrade and add-upgrade
aaronc May 22, 2025
19858b3
refactoring watchers
aaronc May 22, 2025
496bba8
watcher initialization
aaronc May 22, 2025
fc37590
WIP on checkers
aaronc May 27, 2025
14124ea
WIP
aaronc May 28, 2025
7177cbb
working mock node
aaronc May 28, 2025
37f7c9f
switch to jsonpb
aaronc May 28, 2025
5a01495
WIP on refactoring
aaronc May 28, 2025
71c03a9
WIP on watchers
aaronc May 28, 2025
37ae97a
WIP
aaronc May 29, 2025
4745c63
WIP on state machine
aaronc May 29, 2025
ddc0e33
WIP on state machine diagrams
aaronc May 30, 2025
4953995
WIP on state machine
aaronc May 30, 2025
522f5a4
WIP on state machine
aaronc May 30, 2025
5622f47
simplify state machine
aaronc May 30, 2025
07545e8
WIP on runner
aaronc May 30, 2025
090cdfa
WIP on runner
aaronc May 30, 2025
869406b
WIP on runner
aaronc May 30, 2025
20fb950
WIP on runner
aaronc May 30, 2025
0681722
WIP on runner
aaronc May 30, 2025
3100d72
revert
aaronc May 30, 2025
d9ec079
WIP on test setup and run implementation
aaronc Jun 2, 2025
489b234
WIP on runner and test setup
aaronc Jun 2, 2025
be564e6
testing manual upgrades
aaronc Jun 2, 2025
cc3082f
WIP on upgrade flow
aaronc Jun 2, 2025
b7c813f
WIP on upgrade restart flow
aaronc Jun 2, 2025
8e83e45
WIP on upgrade restart flow
aaronc Jun 2, 2025
ac2e934
WIP on upgrade restart flow
aaronc Jun 2, 2025
a53bc4d
WIP on testing setup
aaronc Jun 3, 2025
b1b7eb4
WIP on testing setup
aaronc Jun 3, 2025
291e00f
fixes from testing
aaronc Jun 3, 2025
9445e42
WIP on testing
aaronc Jun 4, 2025
c700952
manual upgrade detection works
aaronc Jun 4, 2025
6a2f118
successful tests so far
aaronc Jun 4, 2025
cc0bbe6
working manual upgrade swapping
aaronc Jun 4, 2025
f0dac99
full test passes
aaronc Jun 4, 2025
100f756
WIP on more test conditions
aaronc Jun 4, 2025
68644ad
WIP on correct batch upgrade processing
aaronc Jun 5, 2025
0b87586
most upgrade tests working, shutdown isn't
aaronc Jun 5, 2025
427c526
shutdown works with some sleep time
aaronc Jun 5, 2025
bd0c895
integrate backoff manager
aaronc Jun 6, 2025
5a03e7c
backoff logging
aaronc Jun 6, 2025
638c317
delete refactored code, fix tests
aaronc Jun 6, 2025
990aeaf
migrate most existing tests, add backoff retry count
aaronc Jun 6, 2025
16e7a4b
existing tests migrated
aaronc Jun 6, 2025
fba2bac
remove dead test code
aaronc Jun 9, 2025
6cd1227
switch to known error for signaling upgrade completion
aaronc Jun 9, 2025
54d625b
simplify test setup
aaronc Jun 9, 2025
cb624f8
fix tests
aaronc Jun 9, 2025
d0d6e66
comments
aaronc Jun 9, 2025
d9affb9
add additional notes
aaronc Jun 9, 2025
94675f9
switch to just logging watcher errors
aaronc Jun 9, 2025
83ef052
switch to ErrorHandler interface
aaronc Jun 9, 2025
c9d0e11
sniffing for /block or /v1/block
aaronc Jun 9, 2025
7b128d2
refactor file deletion
aaronc Jun 9, 2025
b429d86
test case with node shutting down on upgrade
aaronc Jun 9, 2025
7d3c0a3
add additional test
aaronc Jun 9, 2025
7a6ece7
include test to check both json encodings work
aaronc Jun 9, 2025
85b75bd
make more code internal
aaronc Jun 10, 2025
06f8c53
update logs, cleanup
aaronc Jun 12, 2025
f971fa2
refactor show manual upgrades command, make config loading more consi…
aaronc Jun 12, 2025
e595bb9
document manual upgrade behavior
aaronc Jun 12, 2025
947c5f8
WIP on cosmovisor system tests
aaronc Jun 12, 2025
e1ccecc
WIP on cosmovisor system tests
aaronc Jun 12, 2025
8cbf93b
fix NPE error
aaronc Jun 13, 2025
822be0e
Merge branch 'main' of github.com:cosmos/cosmos-sdk into aaronc/22731…
aaronc Jun 13, 2025
8ec6925
create separate cosmovisor systemtest
aaronc Jun 13, 2025
f9f5c62
logging fixes
aaronc Jun 13, 2025
e4e3feb
WIP on cosmovisor system tests
aaronc Jun 13, 2025
41be9b0
WIP on cosmovisor system tests
aaronc Jun 13, 2025
2ffffde
working cosmovisor system tests
aaronc Jun 16, 2025
f190de8
only start height watcher if we have a halt height set
aaronc Jun 24, 2025
896202f
remove completed TODOs
aaronc Jun 24, 2025
1e7cc72
update system test make task to include cosmovisor
aaronc Jun 25, 2025
2577a32
WIP on adding some non-determinism during manual upgrade
aaronc Jun 25, 2025
3175579
add a proper manual upgrade test
aaronc Jun 26, 2025
b8a4948
fix test
aaronc Jun 26, 2025
36fe230
don't delete upgrade-info.json file, instead check that the upgrade n…
aaronc Jun 26, 2025
2b82656
rename
aaronc Jun 30, 2025
0c5ad53
move shutdown go routine to run because that's where it's really rele…
aaronc Jun 30, 2025
0d07bce
address TODOs
aaronc Jun 30, 2025
9cbc260
remove TODO
aaronc Jun 30, 2025
f93a7cb
fix scanner_test.go
aaronc Jun 30, 2025
c9fb18f
fix bug
aaronc Jun 30, 2025
1aac5d1
refactor process runner to better handle cases where process could ha…
aaronc Jun 30, 2025
03e7c72
add TODOs
aaronc Jun 30, 2025
7d2d1c6
move everything to v2
aaronc Jun 30, 2025
726288e
update CHANGELOG.md
aaronc Jun 30, 2025
7cd0292
Merge branch 'main' of github.com:cosmos/cosmos-sdk into aaronc/22731…
aaronc Jun 30, 2025
b7721c8
go mod tidy
aaronc Jun 30, 2025
28e2e68
update CHANGELOG.md's
aaronc Jun 30, 2025
9cd07c0
check cosmovisor symlinks, confirm upgrade info readable
aaronc Jun 30, 2025
3122f9e
switch to pointer
aaronc Jun 30, 2025
c0efa1d
add comments
aaronc Jun 30, 2025
53032d4
revert CHANGELOG.md reformatting, remove old RELEASE_NOTES.md
aaronc Jun 30, 2025
2b3a5d7
add more state breakage to manual upgrade
aaronc Jun 30, 2025
42fb734
fix x/upgrade tests
aaronc Jun 30, 2025
1ffada5
fix systemtests
aaronc Jun 30, 2025
1ea885b
lint-fix, go mod tidy, cleanup
aaronc Jun 30, 2025
c005143
check that upgrade handlers are called
aaronc Jun 30, 2025
1cf11cd
only set env when we're using cosmovisor
aaronc Jun 30, 2025
0183068
Update tools/cosmovisor/internal/watchers/fsnotify_watcher.go
aaronc Jul 1, 2025
61ad5e9
fix code suggestion
aaronc Jul 1, 2025
f08259a
fix test isolation
aaronc Jul 7, 2025
dfbcffd
Merge branch 'main' into aaronc/22731-cosmovisor-fixes
aaronc Jul 7, 2025
9113c9d
isolate all system tests
aaronc Jul 7, 2025
49f0ea7
Merge remote-tracking branch 'origin/aaronc/22731-cosmovisor-fixes' i…
aaronc Jul 7, 2025
be003aa
go mod tidy
aaronc Jul 7, 2025
e2b2fce
attempt to fix cosmovisor tests
aaronc Jul 7, 2025
03c5cac
fix comments
aaronc Jul 7, 2025
857e809
fail on unexpected callback count
aaronc Jul 7, 2025
a92aba1
attempt to fix cosmovisor tests
aaronc Jul 7, 2025
0fac9ff
attempt to fix cosmovisor tests
aaronc Jul 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -505,13 +505,21 @@ localnet-debug: localnet-stop localnet-build-dlv localnet-build-nodes

.PHONY: localnet-start localnet-stop localnet-debug localnet-build-env localnet-build-dlv localnet-build-nodes

test-system: build-v53 build
# build-system-test-current builds the binaries necessary for running system tests, but only those on the current branch
# this is useful if you are iterating on tests which rely on changes to the current branch only (which is most common in development)
build-system-test-current: build cosmovisor
mkdir -p ./tests/systemtests/binaries/
cp $(BUILDDIR)/simd ./tests/systemtests/binaries/
cp tools/cosmovisor/cosmovisor ./tests/systemtests/binaries/

# build-system-test builds the binaries necessary for runnings system tests and places them in the correct locations
build-system-test: build-system-test-current build-v53
mkdir -p ./tests/systemtests/binaries/v0.53
mv $(BUILDDIR)/simdv53 ./tests/systemtests/binaries/v0.53/simd

test-system: build-system-test
$(MAKE) -C tests/systemtests test
.PHONY: test-system
.PHONY: build-system-test-current build-system-test test-system

# build-v53 checks out the v0.53.x branch, builds the binary, and renames it to simdv53.
build-v53:
Expand Down
37 changes: 36 additions & 1 deletion simapp/upgrades.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
package simapp

import (
"bytes"
"context"
"fmt"
"os"

"github.com/cosmos/gogoproto/jsonpb"

storetypes "cosmossdk.io/store/types"

Expand All @@ -16,7 +21,10 @@ import (
// NOTE: This upgrade defines a reference implementation of what an upgrade
// could look like when an application is migrating from Cosmos SDK version
// v0.53.x to v0.54.x.
const UpgradeName = "v053-to-v054"
const (
UpgradeName = "v053-to-v054"
ManualUpgradeName = "manual1"
)

func (app SimApp) RegisterUpgradeHandlers() {
app.UpgradeKeeper.SetUpgradeHandler(
Expand All @@ -26,11 +34,38 @@ func (app SimApp) RegisterUpgradeHandlers() {
return app.ModuleManager.RunMigrations(ctx, app.Configurator(), fromVM)
},
)
// we add another upgrade, to be performed manually which does some small state breakage
app.UpgradeKeeper.SetUpgradeHandler(
ManualUpgradeName,
func(ctx context.Context, plan upgradetypes.Plan, fromVM module.VersionMap) (module.VersionMap, error) {
// do some minimal state breaking update
err := app.GovKeeper.Constitution.Set(ctx,
fmt.Sprintf("we have expected upgrade %q and that's now our constitution", plan.Name))
return fromVM, err
},
)

// we check that we can read the upgrade info from disk, which is necessary for setting store key upgrades
upgradeInfo, err := app.UpgradeKeeper.ReadUpgradeInfoFromDisk()
if err != nil {
panic(err)
}
if upgradeInfo.Name != "" {
app.Logger().Info("read upgrade info from disk", "upgrade_info", upgradeInfo)
}

// this allows to test stateful manual upgrades with Cosmovisor
if manualUpgradeVar, ok := os.LookupEnv("SIMAPP_MANUAL_UPGRADE"); ok {
var manualUpgrade upgradetypes.Plan
err := (&jsonpb.Unmarshaler{}).Unmarshal(bytes.NewBufferString(manualUpgradeVar), &manualUpgrade)
if err != nil {
panic("invalid SIMAPP_MANUAL_UPGRADE: " + err.Error())
}
err = app.UpgradeKeeper.SetManualUpgrade(&manualUpgrade)
if err != nil {
panic("failed to set manual upgrade: " + err.Error())
}
}

if upgradeInfo.Name == UpgradeName && !app.UpgradeKeeper.IsSkipHeight(upgradeInfo.Height) {
storeUpgrades := storetypes.StoreUpgrades{
Expand Down
80 changes: 75 additions & 5 deletions systemtests/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,24 @@

func (s *SystemUnderTest) StartChain(t *testing.T, xargs ...string) {
t.Helper()
s.doStartChain(t, false, xargs...)
}

// StartChainWithCosmovisor starts the chain wrapping its execution with Cosmovisor.
func (s *SystemUnderTest) StartChainWithCosmovisor(t *testing.T, xargs ...string) {
t.Helper()
s.doStartChain(t, true, xargs...)
}

func (s *SystemUnderTest) doStartChain(t *testing.T, useCosmovisor bool, xargs ...string) {
t.Helper()
if useCosmovisor {
s.initCosmovisor(t)
}
s.Log("Start chain\n")
s.ChainStarted = true
// HACK: force db_backend
s.startNodesAsync(t, append([]string{"start", "--log_level=info", "--log_no_color", "--db_backend=goleveldb"}, xargs...)...)
s.startNodesAsync(t, useCosmovisor, append([]string{"start", "--log_level=info", "--log_no_color", "--db_backend=goleveldb"}, xargs...)...)

s.AwaitNodeUp(t, s.rpcAddr)

Expand All @@ -195,6 +209,48 @@
s.AwaitNextBlock(t, 10e9)
}

func (s *SystemUnderTest) cosmovisorEnv(t *testing.T, home string) []string {
t.Helper()
absHome, err := filepath.Abs(home)
require.NoError(t, err)
return []string{
fmt.Sprintf("DAEMON_HOME=%s", absHome),
fmt.Sprintf("DAEMON_NAME=%s", s.projectName),
}
}

func (s *SystemUnderTest) cosmovisorPath() string {
return filepath.Join(WorkDir, "binaries", "cosmovisor")
}

// ExecCosmovisor executes the Cosmovisor binary with the given arguments
// for each node in the network with the home directory set properly for each node.
func (s *SystemUnderTest) ExecCosmovisor(t *testing.T, async bool, args ...string) {

Check failure on line 228 in systemtests/system.go

View workflow job for this annotation

GitHub Actions / golangci-lint

test helper function should start from t.Helper() (thelper)

Check failure on line 228 in systemtests/system.go

View workflow job for this annotation

GitHub Actions / Analyze

test helper function should start from t.Helper() (thelper)
s.withEachNodeHome(func(i int, home string) {
env := s.cosmovisorEnv(t, home)
t.Logf("Calling Cosmovisor with args %+v and env %+v", args, env)
cmd := exec.Command(

Check failure on line 232 in systemtests/system.go

View workflow job for this annotation

GitHub Actions / golangci-lint

G204: Subprocess launched with a potential tainted input or cmd arguments (gosec)

Check failure on line 232 in systemtests/system.go

View workflow job for this annotation

GitHub Actions / Analyze

G204: Subprocess launched with a potential tainted input or cmd arguments (gosec)
s.cosmovisorPath(),
args...,
)
Comment on lines +232 to +235

Check failure

Code scanning / gosec

Subprocess launched with variable Error

Subprocess launched with a potential tainted input or cmd arguments
cmd.Dir = WorkDir
env = append(env, "COSMOVISOR_COLOR_LOGS=false")
cmd.Env = env
if async {
require.NoError(t, cmd.Start(), "cosmovisor init %d", i)
s.awaitProcessCleanup(cmd)
} else {
require.NoError(t, cmd.Run(), "cosmovisor init %d", i)
}
})
}

func (s *SystemUnderTest) initCosmovisor(t *testing.T) {
t.Helper()
binary := locateExecutable(s.execBinary)
s.ExecCosmovisor(t, false, "init", binary)
}

// MarkDirty whole chain will be reset when marked dirty
func (s *SystemUnderTest) MarkDirty() {
s.dirty = true
Expand Down Expand Up @@ -591,16 +647,30 @@
}

// startNodesAsync runs the given app cli command for all cluster nodes and returns without waiting
func (s *SystemUnderTest) startNodesAsync(t *testing.T, xargs ...string) {
func (s *SystemUnderTest) startNodesAsync(t *testing.T, useCosmovisor bool, xargs ...string) {
t.Helper()
s.withEachNodeHome(func(i int, home string) {
args := append(xargs, "--home="+home)
absHome, err := filepath.Abs(home)
require.NoError(t, err, "failed to get absolute home path")
args := append(xargs, "--home="+absHome)
var binary string
var env []string
if useCosmovisor {
binary = s.cosmovisorPath()
args = append([]string{"run"}, args...) // cosmovisor run <args>
cfgPath := filepath.Join(absHome, "cosmovisor", "config.toml")
args = append(args, "--cosmovisor-config", cfgPath)
env = s.cosmovisorEnv(t, absHome)
} else {
binary = locateExecutable(s.execBinary)
}
s.Logf("Execute `%s %s`\n", s.execBinary, strings.Join(args, " "))
cmd := exec.Command( //nolint:gosec // used by tests only
locateExecutable(s.execBinary),
cmd := exec.Command(
binary,
args...,
)

Check failure

Code scanning / gosec

Subprocess launched with variable Error

Subprocess launched with variable
cmd.Dir = WorkDir
cmd.Env = env
s.watchLogs(i, cmd)
require.NoError(t, cmd.Start(), "node %d", i)
s.Logf("Node started: %d\n", cmd.Process.Pid)
Expand Down
Loading
Loading