Skip to content

Commit 0f174ee

Browse files
Ehco1996claude
andcommitted
refactor(updater): drop in-process state machine; UI polls /version
The systemd restart kills this process before any "restarting/done" state can be polled, so the 5-state machine produced UIs stuck on "Downloading" while the update actually succeeded. Drop the state tracking entirely: - updater.Apply no longer takes onState; runs to completion or returns error. - /api/v1/update/apply is fire-and-forget (HTTP 202); /update/status is removed along with JobStatus and updateJob. - Dashboard polls /api/v1/version every 2s after click; success when git_revision changes; 60s timeout surfaces "check journalctl" hint. - Failure path stays in s.l logger -> journalctl, same as before. Net -193 lines. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent b98ab90 commit 0f174ee

7 files changed

Lines changed: 201 additions & 394 deletions

File tree

internal/cli/update.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@ var UpdateCMD = &cli.Command{
2424
Channel: c.String("channel"),
2525
Force: c.Bool("force"),
2626
Restart: !c.Bool("no-restart"),
27-
}, constant.Version, constant.GitRevision, cliLogger, nil)
27+
}, constant.Version, constant.GitRevision, cliLogger)
2828
},
2929
}

internal/updater/updater.go

Lines changed: 7 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -29,18 +29,6 @@ const (
2929
systemdServiceName = "ehco"
3030
)
3131

32-
// State is the phase of an Apply run; consumed by the web UI.
33-
type State string
34-
35-
const (
36-
StateChecking State = "checking"
37-
StateDownloading State = "downloading"
38-
StateInstalling State = "installing"
39-
StateRestarting State = "restarting"
40-
StateDone State = "done"
41-
StateFailed State = "failed"
42-
)
43-
4432
// CheckResult describes a release relative to the running binary.
4533
type CheckResult struct {
4634
Channel string `json:"channel"`
@@ -110,16 +98,12 @@ func Check(ctx context.Context, channel, currentVersion, currentRevision string)
11098
return res, nil
11199
}
112100

113-
// Apply downloads + swaps + (optionally) restarts. Each phase is reported
114-
// to onState so the dashboard can render progress; CLI passes nil.
115-
func Apply(ctx context.Context, opts ApplyOptions, currentVersion, currentRevision string, log *zap.SugaredLogger, onState func(State)) error {
116-
emit := func(s State) {
117-
if onState != nil {
118-
onState(s)
119-
}
120-
}
121-
122-
emit(StateChecking)
101+
// Apply downloads + swaps + (optionally) restarts. Errors are returned
102+
// for the caller to surface; phase tracking has been intentionally
103+
// dropped — the systemd restart kills this process before any
104+
// "restarting/done" state could be polled, making intermediate states
105+
// unreliable. The dashboard polls /version to detect completion.
106+
func Apply(ctx context.Context, opts ApplyOptions, currentVersion, currentRevision string, log *zap.SugaredLogger) error {
123107
resolved, rel, err := pickRelease(ctx, opts.Channel, currentVersion)
124108
if err != nil {
125109
return err
@@ -134,7 +118,6 @@ func Apply(ctx context.Context, opts ApplyOptions, currentVersion, currentRevisi
134118
rel.TagName, currentRevision)
135119
} else {
136120
log.Info("already up to date")
137-
emit(StateDone)
138121
return nil
139122
}
140123
} else if compareVersions(latest, currentVersion) < 0 {
@@ -156,14 +139,12 @@ func Apply(ctx context.Context, opts ApplyOptions, currentVersion, currentRevisi
156139
}
157140
tmpPath := binPath + ".new"
158141

159-
emit(StateDownloading)
160142
log.Infof("downloading %s -> %s", asset.BrowserDownloadURL, tmpPath)
161143
if err := download(ctx, asset.BrowserDownloadURL, tmpPath); err != nil {
162144
_ = os.Remove(tmpPath)
163145
return fmt.Errorf("download: %w", err)
164146
}
165147

166-
emit(StateInstalling)
167148
// rename(2) over a running ELF on linux is safe: the kernel keeps the
168149
// old inode alive for the running process while new invocations
169150
// resolve to the new file.
@@ -179,15 +160,9 @@ func Apply(ctx context.Context, opts ApplyOptions, currentVersion, currentRevisi
179160

180161
if !opts.Restart {
181162
log.Info("skipping restart; restart manually to pick up the new binary")
182-
emit(StateDone)
183163
return nil
184164
}
185-
emit(StateRestarting)
186-
if err := restartSystemd(log); err != nil {
187-
return err
188-
}
189-
emit(StateDone)
190-
return nil
165+
return restartSystemd(log)
191166
}
192167

193168
func pickRelease(ctx context.Context, channel, currentVersion string) (string, *ghRelease, error) {

internal/web/handler_update.go

Lines changed: 12 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -23,18 +23,6 @@ type VersionInfo struct {
2323
GoArch string `json:"go_arch"`
2424
}
2525

26-
// JobStatus is the in-memory record of the most-recent update attempt.
27-
// Process-local on purpose: after a successful restart the new process
28-
// boots with no record, the SPA reloads /version and sees the new build.
29-
type JobStatus struct {
30-
State updater.State `json:"state"`
31-
Channel string `json:"channel"`
32-
From string `json:"from"`
33-
To string `json:"to"`
34-
StartedAt time.Time `json:"started_at"`
35-
Error string `json:"error,omitempty"`
36-
}
37-
3826
func (s *Server) Version(c echo.Context) error {
3927
return c.JSON(http.StatusOK, VersionInfo{
4028
Version: constant.Version,
@@ -61,6 +49,10 @@ func (s *Server) UpdateCheck(c echo.Context) error {
6149
return c.JSON(http.StatusOK, res)
6250
}
6351

52+
// UpdateApply kicks off the update in a detached goroutine and returns
53+
// immediately. The dashboard polls /version to detect completion (the
54+
// running process restarts mid-flow, so any in-process state machine is
55+
// inherently lossy). Failures are logged via s.l; check journalctl.
6456
func (s *Server) UpdateApply(c echo.Context) error {
6557
if runtime.GOOS != "linux" {
6658
return echo.NewHTTPError(http.StatusBadRequest,
@@ -73,58 +65,14 @@ func (s *Server) UpdateApply(c echo.Context) error {
7365
if opts.Channel == "" {
7466
opts.Channel = updater.ChannelAuto
7567
}
76-
77-
prev := s.updateJob.Load()
78-
if prev != nil && isInProgress(prev.State) {
79-
return echo.NewHTTPError(http.StatusConflict, "another update is already running")
80-
}
81-
82-
job := &JobStatus{
83-
State: updater.StateChecking,
84-
Channel: opts.Channel,
85-
From: constant.Version,
86-
StartedAt: time.Now().UTC(),
87-
}
88-
s.updateJob.Store(job)
8968
s.l.Infof("update apply requested channel=%s force=%v restart=%v", opts.Channel, opts.Force, opts.Restart)
9069

91-
// Detached context: closing the browser shouldn't abort an in-flight swap.
92-
go s.runUpdate(opts, job)
93-
return c.JSON(http.StatusAccepted, map[string]string{"state": string(updater.StateChecking)})
94-
}
95-
96-
func (s *Server) runUpdate(opts updater.ApplyOptions, job *JobStatus) {
97-
ctx, cancel := context.WithTimeout(context.Background(), updateApplyTimeout)
98-
defer cancel()
99-
100-
onState := func(st updater.State) {
101-
// Copy-on-write so /status readers always see a consistent snapshot.
102-
next := *job
103-
next.State = st
104-
s.updateJob.Store(&next)
105-
*job = next
106-
}
107-
108-
if err := updater.Apply(ctx, opts, constant.Version, constant.GitRevision, s.l, onState); err != nil {
109-
next := *job
110-
next.State = updater.StateFailed
111-
next.Error = err.Error()
112-
s.updateJob.Store(&next)
113-
s.l.Errorf("update failed: %v", err)
114-
}
115-
}
116-
117-
func (s *Server) UpdateStatus(c echo.Context) error {
118-
if j := s.updateJob.Load(); j != nil {
119-
return c.JSON(http.StatusOK, j)
120-
}
121-
return c.JSON(http.StatusOK, map[string]string{"state": "idle"})
122-
}
123-
124-
func isInProgress(s updater.State) bool {
125-
switch s {
126-
case updater.StateChecking, updater.StateDownloading, updater.StateInstalling, updater.StateRestarting:
127-
return true
128-
}
129-
return false
70+
go func() {
71+
ctx, cancel := context.WithTimeout(context.Background(), updateApplyTimeout)
72+
defer cancel()
73+
if err := updater.Apply(ctx, opts, constant.Version, constant.GitRevision, s.l); err != nil {
74+
s.l.Errorf("update failed: %v", err)
75+
}
76+
}()
77+
return c.NoContent(http.StatusAccepted)
13078
}

internal/web/server.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@ type Server struct {
3232
cfg *config.Config
3333
auth *authenticator
3434

35-
connMgr cmgr.Cmgr
36-
updateJob atomic.Pointer[JobStatus]
35+
connMgr cmgr.Cmgr
3736

3837
// xrayStatus is wired post-construction by cli boot once the
3938
// XrayServer exists. Always read via Load() — may be nil when
@@ -126,7 +125,6 @@ func setupRoutes(s *Server) {
126125
api.GET("/version", s.Version)
127126
api.GET("/update/check", s.UpdateCheck)
128127
api.POST("/update/apply", s.UpdateApply)
129-
api.GET("/update/status", s.UpdateStatus)
130128

131129
// Local SQLite store: read-side health snapshot + maintenance ops.
132130
// All four mutations are auth-gated through the api group's

internal/web/webui/src/api/client.ts

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ import type {
3535
QueryNodeMetricsResp,
3636
VersionInfo,
3737
UpdateCheck,
38-
UpdateStatus,
3938
UpdateApplyOptions,
4039
OverviewResp,
4140
DBHealth,
@@ -77,12 +76,11 @@ export const api = {
7776
updateCheck: (channel: string) =>
7877
request<UpdateCheck>(`/api/v1/update/check?channel=${encodeURIComponent(channel)}`),
7978
updateApply: (opts: UpdateApplyOptions) =>
80-
request<{ state: string }>("/api/v1/update/apply", {
79+
request<void>("/api/v1/update/apply", {
8180
method: "POST",
8281
headers: { "Content-Type": "application/json" },
8382
body: JSON.stringify(opts),
8483
}),
85-
updateStatus: () => request<UpdateStatus>("/api/v1/update/status"),
8684
dbHealth: () => request<DBHealth>("/api/v1/db/health"),
8785
dbCleanup: (older_than_days: number) =>
8886
request<DBMaintenanceResult>("/api/v1/db/cleanup", {

internal/web/webui/src/api/types.ts

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -122,24 +122,6 @@ export interface UpdateCheck {
122122
asset_url: string;
123123
}
124124

125-
export type UpdateState =
126-
| "idle"
127-
| "checking"
128-
| "downloading"
129-
| "installing"
130-
| "restarting"
131-
| "done"
132-
| "failed";
133-
134-
export interface UpdateStatus {
135-
state: UpdateState;
136-
channel?: string;
137-
from?: string;
138-
to?: string;
139-
started_at?: string;
140-
error?: string;
141-
}
142-
143125
export interface UpdateApplyOptions {
144126
channel: string;
145127
force: boolean;

0 commit comments

Comments
 (0)