Skip to content
Open
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
119 commits
Select commit Hold shift + click to select a range
09fe843
fix(cosmovisor): make manual upgrades use halt-height
aaronc May 14, 2025
4087e2b
WIP on mock-node
aaronc May 15, 2025
5689795
WIP on watcher
aaronc May 16, 2025
c489524
poll watcher tests pass
aaronc May 21, 2025
2277ffb
data watcher tests
aaronc May 21, 2025
f0f5caf
notes
aaronc May 22, 2025
3f628b9
use common logic for add-batch-upgrade and add-upgrade
aaronc May 22, 2025
19858b3
refactoring watchers
aaronc May 22, 2025
496bba8
watcher initialization
aaronc May 22, 2025
fc37590
WIP on checkers
aaronc May 27, 2025
14124ea
WIP
aaronc May 28, 2025
7177cbb
working mock node
aaronc May 28, 2025
37f7c9f
switch to jsonpb
aaronc May 28, 2025
5a01495
WIP on refactoring
aaronc May 28, 2025
71c03a9
WIP on watchers
aaronc May 28, 2025
37ae97a
WIP
aaronc May 29, 2025
4745c63
WIP on state machine
aaronc May 29, 2025
ddc0e33
WIP on state machine diagrams
aaronc May 30, 2025
4953995
WIP on state machine
aaronc May 30, 2025
522f5a4
WIP on state machine
aaronc May 30, 2025
5622f47
simplify state machine
aaronc May 30, 2025
07545e8
WIP on runner
aaronc May 30, 2025
090cdfa
WIP on runner
aaronc May 30, 2025
869406b
WIP on runner
aaronc May 30, 2025
20fb950
WIP on runner
aaronc May 30, 2025
0681722
WIP on runner
aaronc May 30, 2025
3100d72
revert
aaronc May 30, 2025
d9ec079
WIP on test setup and run implementation
aaronc Jun 2, 2025
489b234
WIP on runner and test setup
aaronc Jun 2, 2025
be564e6
testing manual upgrades
aaronc Jun 2, 2025
cc3082f
WIP on upgrade flow
aaronc Jun 2, 2025
b7c813f
WIP on upgrade restart flow
aaronc Jun 2, 2025
8e83e45
WIP on upgrade restart flow
aaronc Jun 2, 2025
ac2e934
WIP on upgrade restart flow
aaronc Jun 2, 2025
a53bc4d
WIP on testing setup
aaronc Jun 3, 2025
b1b7eb4
WIP on testing setup
aaronc Jun 3, 2025
291e00f
fixes from testing
aaronc Jun 3, 2025
9445e42
WIP on testing
aaronc Jun 4, 2025
c700952
manual upgrade detection works
aaronc Jun 4, 2025
6a2f118
successful tests so far
aaronc Jun 4, 2025
cc0bbe6
working manual upgrade swapping
aaronc Jun 4, 2025
f0dac99
full test passes
aaronc Jun 4, 2025
100f756
WIP on more test conditions
aaronc Jun 4, 2025
68644ad
WIP on correct batch upgrade processing
aaronc Jun 5, 2025
0b87586
most upgrade tests working, shutdown isn't
aaronc Jun 5, 2025
427c526
shutdown works with some sleep time
aaronc Jun 5, 2025
bd0c895
integrate backoff manager
aaronc Jun 6, 2025
5a03e7c
backoff logging
aaronc Jun 6, 2025
638c317
delete refactored code, fix tests
aaronc Jun 6, 2025
990aeaf
migrate most existing tests, add backoff retry count
aaronc Jun 6, 2025
16e7a4b
existing tests migrated
aaronc Jun 6, 2025
fba2bac
remove dead test code
aaronc Jun 9, 2025
6cd1227
switch to known error for signaling upgrade completion
aaronc Jun 9, 2025
54d625b
simplify test setup
aaronc Jun 9, 2025
cb624f8
fix tests
aaronc Jun 9, 2025
d0d6e66
comments
aaronc Jun 9, 2025
d9affb9
add additional notes
aaronc Jun 9, 2025
94675f9
switch to just logging watcher errors
aaronc Jun 9, 2025
83ef052
switch to ErrorHandler interface
aaronc Jun 9, 2025
c9d0e11
sniffing for /block or /v1/block
aaronc Jun 9, 2025
7b128d2
refactor file deletion
aaronc Jun 9, 2025
b429d86
test case with node shutting down on upgrade
aaronc Jun 9, 2025
7d3c0a3
add additional test
aaronc Jun 9, 2025
7a6ece7
include test to check both json encodings work
aaronc Jun 9, 2025
85b75bd
make more code internal
aaronc Jun 10, 2025
06f8c53
update logs, cleanup
aaronc Jun 12, 2025
f971fa2
refactor show manual upgrades command, make config loading more consi…
aaronc Jun 12, 2025
e595bb9
document manual upgrade behavior
aaronc Jun 12, 2025
947c5f8
WIP on cosmovisor system tests
aaronc Jun 12, 2025
e1ccecc
WIP on cosmovisor system tests
aaronc Jun 12, 2025
8cbf93b
fix NPE error
aaronc Jun 13, 2025
822be0e
Merge branch 'main' of github.com:cosmos/cosmos-sdk into aaronc/22731…
aaronc Jun 13, 2025
8ec6925
create separate cosmovisor systemtest
aaronc Jun 13, 2025
f9f5c62
logging fixes
aaronc Jun 13, 2025
e4e3feb
WIP on cosmovisor system tests
aaronc Jun 13, 2025
41be9b0
WIP on cosmovisor system tests
aaronc Jun 13, 2025
2ffffde
working cosmovisor system tests
aaronc Jun 16, 2025
f190de8
only start height watcher if we have a halt height set
aaronc Jun 24, 2025
896202f
remove completed TODOs
aaronc Jun 24, 2025
1e7cc72
update system test make task to include cosmovisor
aaronc Jun 25, 2025
2577a32
WIP on adding some non-determinism during manual upgrade
aaronc Jun 25, 2025
3175579
add a proper manual upgrade test
aaronc Jun 26, 2025
b8a4948
fix test
aaronc Jun 26, 2025
36fe230
don't delete upgrade-info.json file, instead check that the upgrade n…
aaronc Jun 26, 2025
2b82656
rename
aaronc Jun 30, 2025
0c5ad53
move shutdown go routine to run because that's where it's really rele…
aaronc Jun 30, 2025
0d07bce
address TODOs
aaronc Jun 30, 2025
9cbc260
remove TODO
aaronc Jun 30, 2025
f93a7cb
fix scanner_test.go
aaronc Jun 30, 2025
c9fb18f
fix bug
aaronc Jun 30, 2025
1aac5d1
refactor process runner to better handle cases where process could ha…
aaronc Jun 30, 2025
03e7c72
add TODOs
aaronc Jun 30, 2025
7d2d1c6
move everything to v2
aaronc Jun 30, 2025
726288e
update CHANGELOG.md
aaronc Jun 30, 2025
7cd0292
Merge branch 'main' of github.com:cosmos/cosmos-sdk into aaronc/22731…
aaronc Jun 30, 2025
b7721c8
go mod tidy
aaronc Jun 30, 2025
28e2e68
update CHANGELOG.md's
aaronc Jun 30, 2025
9cd07c0
check cosmovisor symlinks, confirm upgrade info readable
aaronc Jun 30, 2025
3122f9e
switch to pointer
aaronc Jun 30, 2025
c0efa1d
add comments
aaronc Jun 30, 2025
53032d4
revert CHANGELOG.md reformatting, remove old RELEASE_NOTES.md
aaronc Jun 30, 2025
2b3a5d7
add more state breakage to manual upgrade
aaronc Jun 30, 2025
42fb734
fix x/upgrade tests
aaronc Jun 30, 2025
1ffada5
fix systemtests
aaronc Jun 30, 2025
1ea885b
lint-fix, go mod tidy, cleanup
aaronc Jun 30, 2025
c005143
check that upgrade handlers are called
aaronc Jun 30, 2025
1cf11cd
only set env when we're using cosmovisor
aaronc Jun 30, 2025
0183068
Update tools/cosmovisor/internal/watchers/fsnotify_watcher.go
aaronc Jul 1, 2025
61ad5e9
fix code suggestion
aaronc Jul 1, 2025
f08259a
fix test isolation
aaronc Jul 7, 2025
dfbcffd
Merge branch 'main' into aaronc/22731-cosmovisor-fixes
aaronc Jul 7, 2025
9113c9d
isolate all system tests
aaronc Jul 7, 2025
49f0ea7
Merge remote-tracking branch 'origin/aaronc/22731-cosmovisor-fixes' i…
aaronc Jul 7, 2025
be003aa
go mod tidy
aaronc Jul 7, 2025
e2b2fce
attempt to fix cosmovisor tests
aaronc Jul 7, 2025
03c5cac
fix comments
aaronc Jul 7, 2025
857e809
fail on unexpected callback count
aaronc Jul 7, 2025
a92aba1
attempt to fix cosmovisor tests
aaronc Jul 7, 2025
0fac9ff
attempt to fix cosmovisor tests
aaronc Jul 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions tools/cosmovisor/TEST_SCENARIOS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
- basic: start node, get upgrade-info.json, upgrade
- manual upgrade added while running:
- start node
- get upgrade-info.json.batch
- restart with halt height
- reach halt height
- upgrade
- manual upgrade added while running but get upgrade-info.json:
- start node
- get upgrade-info.json.batch
- restart with halt height
- get upgrade-info.json
- upgrade
- restart with halt height
- reach halt height
- upgrade
- start with halt height
80 changes: 54 additions & 26 deletions tools/cosmovisor/args.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package cosmovisor

import (
"encoding/json"
"bytes"
"errors"
"fmt"
"io"
Expand All @@ -12,6 +12,7 @@ import (
"strings"
"time"

"github.com/cosmos/gogoproto/jsonpb"
"github.com/pelletier/go-toml/v2"
"github.com/spf13/viper"

Expand Down Expand Up @@ -71,9 +72,6 @@ type Config struct {
TimeFormatLogs string `toml:"cosmovisor_timeformat_logs" mapstructure:"cosmovisor_timeformat_logs" default:"kitchen"`
CustomPreUpgrade string `toml:"cosmovisor_custom_preupgrade" mapstructure:"cosmovisor_custom_preupgrade" default:""`
DisableRecase bool `toml:"cosmovisor_disable_recase" mapstructure:"cosmovisor_disable_recase" default:"false"`

// currently running upgrade
currentUpgrade upgradetypes.Plan
}

// Root returns the root directory where all info lives
Expand Down Expand Up @@ -107,9 +105,14 @@ func (cfg *Config) BaseUpgradeDir() string {
return filepath.Join(cfg.Root(), upgradesDir)
}

// UpgradeInfoDir is the directory where upgrade-info.json is expected to be created by `x/upgrade/keeper`.
func (cfg *Config) UpgradeInfoDir() string {
return filepath.Join(cfg.Home, "data")
}

// UpgradeInfoFilePath is the expected upgrade-info filename created by `x/upgrade/keeper`.
func (cfg *Config) UpgradeInfoFilePath() string {
return filepath.Join(cfg.Home, "data", upgradetypes.UpgradeInfoFilename)
return filepath.Join(cfg.UpgradeInfoDir(), upgradetypes.UpgradeInfoFilename)
}

// UpgradeInfoBatchFilePath is the same as UpgradeInfoFilePath but with a batch suffix.
Expand Down Expand Up @@ -408,7 +411,6 @@ func (cfg *Config) SetCurrentUpgrade(u upgradetypes.Plan) (rerr error) {
return fmt.Errorf("creating current symlink: %w", err)
}

cfg.currentUpgrade = u
f, err := os.Create(filepath.Join(cfg.Root(), upgrade, upgradetypes.UpgradeInfoFilename))
if err != nil {
return err
Expand All @@ -420,39 +422,63 @@ func (cfg *Config) SetCurrentUpgrade(u upgradetypes.Plan) (rerr error) {
}
}()

bz, err := json.Marshal(u)
out, err := (&jsonpb.Marshaler{}).MarshalToString(&u)
if err != nil {
return err
}
_, err = f.Write(bz)
_, err = f.Write([]byte(out))
return err
}

// UpgradeInfo returns the current upgrade info
func (cfg *Config) UpgradeInfo() (upgradetypes.Plan, error) {
if cfg.currentUpgrade.Name != "" {
return cfg.currentUpgrade, nil
}

filename := filepath.Join(cfg.Root(), currentLink, upgradetypes.UpgradeInfoFilename)
filename := cfg.UpgradeInfoFilePath()
fmt.Printf("Reading upgrade info from %q\n", filename)
_, err := os.Lstat(filename)
var u upgradetypes.Plan
var bz []byte
if err != nil { // no current directory
goto returnError
return upgradetypes.Plan{}, fmt.Errorf("failed to read %q: %w", filename, err)
}
if bz, err = os.ReadFile(filename); err != nil {
goto returnError
return upgradetypes.Plan{}, fmt.Errorf("failed to read %q: %w", filename, err)
}
if err = json.Unmarshal(bz, &u); err != nil {
goto returnError
return cfg.ParseUpgradeInfo(bz)
}

func (cfg *Config) ParseUpgradeInfo(bz []byte) (upgradetypes.Plan, error) {
var upgradePlan upgradetypes.Plan
if err := jsonpb.Unmarshal(bytes.NewReader(bz), &upgradePlan); err != nil {
return upgradetypes.Plan{}, fmt.Errorf("error unmarshalling upgrade info: %w", err)
}
if err := upgradePlan.ValidateBasic(); err != nil {
return upgradetypes.Plan{}, fmt.Errorf("upgrade info failed validation upgrade inof: %w", err)
}
cfg.currentUpgrade = u
return cfg.currentUpgrade, nil
if !cfg.DisableRecase {
upgradePlan.Name = strings.ToLower(upgradePlan.Name)
}
return upgradePlan, nil
}

const LastKnownHeightFile = ".last_known_height"

func (cfg Config) ReadLastKnownHeight() uint64 {
filename := filepath.Join(cfg.UpgradeInfoDir(), LastKnownHeightFile)
bz, err := os.ReadFile(filename)

Check failure

Code scanning / gosec

Potential file inclusion via variable Error

Potential file inclusion via variable
if err != nil {
return 0
}

h, err := strconv.ParseUint(string(bz), 10, 64)
if err != nil {
return 0
}

return h
}

returnError:
cfg.currentUpgrade.Name = "_"
return cfg.currentUpgrade, fmt.Errorf("failed to read %q: %w", filename, err)
func (cfg Config) WriteLastKnownHeight(height uint64) error {
filename := filepath.Join(cfg.UpgradeInfoDir(), LastKnownHeightFile)
return os.WriteFile(filename, []byte(strconv.FormatUint(height, 10)), 0644)
}

// BooleanOption checks and validate env option
Expand Down Expand Up @@ -575,7 +601,7 @@ func (cfg Config) DetailString() string {
var sb strings.Builder
sb.WriteString("Configurable Values:\n")
for _, kv := range configEntries {
fmt.Fprintf(&sb, " %s: %s\n", kv.name, kv.value)
_, _ = fmt.Fprintf(&sb, " %s: %s\n", kv.name, kv.value)
}
sb.WriteString("Derived Values:\n")
dnl := 0
Expand All @@ -586,7 +612,7 @@ func (cfg Config) DetailString() string {
}
dFmt := fmt.Sprintf(" %%%ds: %%s\n", dnl)
for _, kv := range derivedEntries {
fmt.Fprintf(&sb, dFmt, kv.name, kv.value)
_, _ = fmt.Fprintf(&sb, dFmt, kv.name, kv.value)
}
return sb.String()
}
Expand Down Expand Up @@ -614,7 +640,9 @@ func (cfg Config) Export() (string, error) {
// convert the time value to its format option
cfg.TimeFormatLogs = ValueToTimeFormatOption(cfg.TimeFormatLogs)

defer file.Close()
defer func(file *os.File) {
_ = file.Close()
}(file)

// write the configuration to the file
err = toml.NewEncoder(file).Encode(cfg)
Expand Down
29 changes: 12 additions & 17 deletions tools/cosmovisor/cmd/cosmovisor/add_upgrade.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package main

import (
"encoding/json"
"fmt"
"os"
"path"
Expand All @@ -10,8 +9,6 @@ import (
"github.com/spf13/cobra"

"cosmossdk.io/tools/cosmovisor"

upgradetypes "github.com/cosmos/cosmos-sdk/x/upgrade/types"
)

func NewAddUpgradeCmd() *cobra.Command {
Expand All @@ -23,14 +20,15 @@ func NewAddUpgradeCmd() *cobra.Command {
RunE: addUpgradeCmd,
}

addUpgrade.Flags().Bool(cosmovisor.FlagForce, false, "overwrite existing upgrade binary / upgrade-info.json file")
addUpgrade.Flags().Bool(cosmovisor.FlagForce, false, "overwrite existing upgrade binary and plan with the same name")
addUpgrade.Flags().Int64(cosmovisor.FlagUpgradeHeight, 0, "define a height at which to upgrade the binary automatically (without governance proposal)")

return addUpgrade
}

// addUpgrade adds upgrade info to manifest
func addUpgrade(cfg *cosmovisor.Config, force bool, upgradeHeight int64, upgradeName, executablePath, upgradeInfoPath string) error {
// TODO batch-upgrade and add-upgrade should write to the same batch file
func addUpgrade(cfg *cosmovisor.Config, force bool, upgradeHeight int64, upgradeName, executablePath string) error {
logger := cfg.Logger(os.Stdout)

if !cfg.DisableRecase {
Expand Down Expand Up @@ -65,22 +63,19 @@ func addUpgrade(cfg *cosmovisor.Config, force bool, upgradeHeight int64, upgrade
logger.Info(fmt.Sprintf("Upgrade binary located at %s", cfg.UpgradeBin(upgradeName)))

if upgradeHeight > 0 {
plan := upgradetypes.Plan{Name: upgradeName, Height: upgradeHeight}
if err := plan.ValidateBasic(); err != nil {
panic(fmt.Errorf("something is wrong with cosmovisor: %w", err))
plan := &cosmovisor.ManualUpgradePlan{
Name: upgradeName,
Height: upgradeHeight,
}

// create upgrade-info.json file
planData, err := json.Marshal(plan)
if err != nil {
return fmt.Errorf("failed to marshal upgrade plan: %w", err)
if err := plan.ValidateBasic(); err != nil {
panic(fmt.Errorf("invalid manual upgrade plan: %w", err))
}

if err := saveOrAbort(upgradeInfoPath, planData, force); err != nil {
return err
if err := cosmovisor.AddManualUpgrade(cfg, plan, force); err != nil {
panic(fmt.Errorf("failed to add manual upgrade: %w", err))
}

logger.Info(fmt.Sprintf("%s created, %s upgrade binary will switch at height %d", upgradeInfoPath, upgradeName, upgradeHeight))
logger.Info(fmt.Sprintf("added manual upgrade, node will be set to halt at height %d, and binary for upgrade %q will be activated", upgradeHeight, upgradeName))
}

return nil
Expand Down Expand Up @@ -119,7 +114,7 @@ func addUpgradeCmd(cmd *cobra.Command, args []string) error {
return fmt.Errorf("failed to get upgrade-height flag: %w", err)
}

return addUpgrade(cfg, force, upgradeHeight, upgradeName, executablePath, cfg.UpgradeInfoFilePath())
return addUpgrade(cfg, force, upgradeHeight, upgradeName, executablePath)
}

// saveOrAbort saves data to path or aborts if file exists and force is false
Expand Down
44 changes: 7 additions & 37 deletions tools/cosmovisor/cmd/cosmovisor/batch_upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package main

import (
"encoding/csv"
"encoding/json"
"fmt"
"os"
"path/filepath"
Expand Down Expand Up @@ -49,7 +48,7 @@ cosmovisor add-batch-upgrade --upgrade-file /path/to/batch_upgrade.csv`,

// addBatchUpgrade takes in multiple specified upgrades and creates a single
// batch upgrade file out of them
func addBatchUpgrade(cmd *cobra.Command, args []string) error {
func addBatchUpgrade(cmd *cobra.Command, _ []string) error {
cfg, err := getConfigFromCmd(cmd)
if err != nil {
return err
Expand All @@ -71,7 +70,6 @@ func addBatchUpgrade(cmd *cobra.Command, args []string) error {

// processUpgradeList takes in a list of upgrades and creates a batch upgrade file
func processUpgradeList(cfg *cosmovisor.Config, upgradeList [][]string) error {
upgradeInfoPaths := []string{}
for i, upgrade := range upgradeList {
if len(upgrade) != 3 {
return fmt.Errorf("argument at position %d (%s) is invalid", i, upgrade)
Expand All @@ -82,42 +80,12 @@ func processUpgradeList(cfg *cosmovisor.Config, upgradeList [][]string) error {
if err != nil {
return fmt.Errorf("upgrade height at position %d (%s) is invalid", i, upgrade[2])
}
upgradeInfoPath := cfg.UpgradeInfoFilePath() + "." + upgradeName
upgradeInfoPaths = append(upgradeInfoPaths, upgradeInfoPath)
if err := addUpgrade(cfg, true, upgradeHeight, upgradeName, upgradePath, upgradeInfoPath); err != nil {
return err
}
}

var allData []json.RawMessage
for _, uip := range upgradeInfoPaths {
fileData, err := os.ReadFile(uip)
if err != nil {
return fmt.Errorf("error reading file %s: %w", uip, err)
}

// Verify it's valid JSON
var jsonData json.RawMessage
if err := json.Unmarshal(fileData, &jsonData); err != nil {
return fmt.Errorf("error parsing JSON from file %s: %w", uip, err)
// we use the same logic as the add-upgrade command here, appending to any existing manual upgrade data
if err := addUpgrade(cfg, true, upgradeHeight, upgradeName, upgradePath); err != nil {
return err
}

// Add to our slice
allData = append(allData, jsonData)
}

// Marshal the combined data
batchData, err := json.MarshalIndent(allData, "", " ")
if err != nil {
return fmt.Errorf("error marshaling combined JSON: %w", err)
}

// Write to output file
err = os.WriteFile(cfg.UpgradeInfoBatchFilePath(), batchData, 0o600)
if err != nil {
return fmt.Errorf("error writing combined JSON to file: %w", err)
}

return nil
}

Expand All @@ -127,7 +95,9 @@ func processUpgradeFile(cfg *cosmovisor.Config, upgradeFile string) error {
if err != nil {
return fmt.Errorf("error opening upgrade CSV file %s: %w", upgradeFile, err)
}
defer file.Close()
defer func(file *os.File) {
_ = file.Close()
}(file)

r := csv.NewReader(file)
r.FieldsPerRecord = 3
Expand Down
1 change: 1 addition & 0 deletions tools/cosmovisor/cmd/cosmovisor/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/spf13/cobra"

"cosmossdk.io/log"

"cosmossdk.io/tools/cosmovisor"

"github.com/cosmos/cosmos-sdk/x/upgrade/plan"
Expand Down
5 changes: 4 additions & 1 deletion tools/cosmovisor/cmd/cosmovisor/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@ package main
import (
"context"
"os"
"os/signal"
"syscall"
)

func main() {
if err := NewRootCmd().ExecuteContext(context.Background()); err != nil {
ctx, _ := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
if err := NewRootCmd().ExecuteContext(ctx); err != nil {
os.Exit(1)
}
}
Loading
Loading