Skip to content

Commit f6a0781

Browse files
committed
Handle in-progress teardown when waiting for master
1 parent cb771eb commit f6a0781

File tree

1 file changed

+98
-20
lines changed

1 file changed

+98
-20
lines changed

pcmd.go

Lines changed: 98 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,7 @@ func main() {
4444
// require centralizing child process management, which doesn't seem worth
4545
// it so far. For now, we simply trust that all blocking operations will
4646
// cooperate and correctly implement the grace period logic.
47-
sig := <-signals
48-
fmt.Fprintf(os.Stderr, "Received %s signal, giving processes %d seconds to clean up...\n", sig, config.GracePeriod)
49-
47+
<-signals
5048
cancel()
5149
}()
5250

@@ -71,32 +69,64 @@ func lockOrExpectControlMaster(ctx context.Context, config Config) {
7169
if locked {
7270
pipeToProxyCommand(ctx, config)
7371
} else {
74-
// Wait for control path, then try a nested ssh connection
75-
fmt.Fprintf(os.Stderr, "Waiting for SSH ControlMaster...\n")
72+
fmt.Fprintf(os.Stderr, "Waiting up to %d seconds for SSH ControlMaster\n", 2*time.Duration(config.GracePeriod))
7673

7774
cancelLogTail, err := tailLogFileToStdErr(config)
7875
if err != nil {
7976
log.Fatal(err)
8077
}
8178
defer cancelLogTail()
8279

83-
controlMasterIsUp := waitForControlMaster(config)
84-
cancelLogTail()
80+
masterIsUp, cancelMasterWait := waitForControlMaster(config)
81+
82+
lockAcquired, cancelLockWait := waitForLock(config)
83+
84+
timeout := time.After(2 * time.Duration(config.GracePeriod) * time.Second)
85+
86+
select {
87+
// We acquired a lock, pipe to proxy command directly. This happens if we
88+
// started during the teardown phase. The ControlMaster will never come up
89+
// in that case, but we still have to wait for teardown to complete.
90+
case <-lockAcquired:
91+
cancelLogTail()
92+
cancelMasterWait()
93+
cancelLockWait()
94+
95+
pipeToProxyCommand(ctx, config)
96+
return
97+
98+
// The ControlMaster came up, create a new nested SSH session
99+
case <-masterIsUp:
100+
cancelLogTail()
101+
cancelMasterWait()
102+
cancelLockWait()
85103

86-
if controlMasterIsUp {
87104
sshPort := strconv.Itoa(config.SSHPort)
88105
sshUserAtHost := fmt.Sprintf("%s@%s", config.SSHUser, config.SSHHost)
89106
proxyTarget := fmt.Sprintf("localhost:%d", config.SSHPort)
90107
cmd := exec.Command("ssh", "-W", proxyTarget, "-p", sshPort, sshUserAtHost)
91108

92109
cmd.Stdin = os.Stdin
93110
cmd.Stdout = os.Stdout
94-
cmd.Stderr = os.Stderr
111+
cmd.Stderr = nil
95112

96113
cmd.Start()
97114
cmd.Wait()
98-
} else {
99-
fmt.Fprintf(os.Stderr, "ControlMaster not detected\n")
115+
116+
return
117+
118+
case <-timeout:
119+
cancelLogTail()
120+
cancelMasterWait()
121+
cancelLockWait()
122+
123+
fmt.Fprintf(os.Stderr, "Timeout waiting for ControlMaster\n")
124+
os.Exit(1)
125+
126+
case <-ctx.Done():
127+
cancelLogTail()
128+
cancelMasterWait()
129+
cancelLockWait()
100130
os.Exit(1)
101131
}
102132
}
@@ -192,6 +222,7 @@ func pipeToProxyCommand(ctx context.Context, config Config) {
192222
// forwarded to the interactive terminal (because its hella annoying to have
193223
// things printed to a bash session after a command appears to be finished)
194224
CleanupAndWaitForGracePeriod:
225+
fmt.Fprintf(os.Stderr, "Giving processes %d seconds to clean up\n", config.GracePeriod)
195226
cancelLogTail()
196227

197228
stdinPipe.Close()
@@ -306,19 +337,66 @@ func tailLogFileToStdErr(config Config) (cancel func() error, err error) {
306337
return cancel, nil
307338
}
308339

309-
func waitForControlMaster(config Config) bool {
310-
for attempts := 0; attempts < 400; attempts++ {
311-
attempts++
340+
func waitForControlMaster(config Config) (<-chan struct{}, func()) {
341+
done := make(chan struct{})
342+
cancelChan := make(chan struct{})
343+
344+
cancelFunc := func() {
345+
close(cancelChan)
346+
}
347+
348+
go func() {
349+
for {
350+
success := controlMasterIsUp(config)
351+
352+
if success {
353+
done <- struct{}{}
354+
return
355+
}
312356

313-
success := controlMasterIsUp(config)
357+
interval := time.After(250 * time.Millisecond)
314358

315-
if success {
316-
return success
359+
select {
360+
case <-interval:
361+
// do nothing
362+
case <-cancelChan:
363+
return
364+
}
317365
}
366+
}()
367+
368+
return done, cancelFunc
369+
}
370+
371+
func waitForLock(config Config) (<-chan struct{}, func()) {
372+
done := make(chan struct{})
373+
cancelChan := make(chan struct{})
318374

319-
time.Sleep(250 * time.Millisecond)
375+
cancelFunc := func() {
376+
close(cancelChan)
320377
}
321-
return false
378+
379+
go func() {
380+
for {
381+
_, locked := flockPath(config.LockFilePath)
382+
383+
if locked {
384+
done <- struct{}{}
385+
return
386+
}
387+
388+
interval := time.After(250 * time.Millisecond)
389+
390+
select {
391+
case <-interval:
392+
// do nothing
393+
case <-cancelChan:
394+
return
395+
}
396+
}
397+
}()
398+
399+
return done, cancelFunc
322400
}
323401

324402
func controlMasterIsUp(config Config) bool {
@@ -369,7 +447,7 @@ type Config struct {
369447
// SSH connection
370448
ExpectControlMaster bool
371449

372-
//Show the current version of PCMD
450+
//Show the current version of pcmd
373451
ShowVersion bool
374452

375453
// Derived options, calculated during parseConfig()

0 commit comments

Comments
 (0)