Skip to content

Commit c91c9b7

Browse files
Merge pull request shipwright-io#56 from gabemontero/fix-follow-no-pod
various run build -F/--follow fixes around timeout, lack of pods, and data races
2 parents 4831482 + 71ba9a0 commit c91c9b7

17 files changed

+374
-101
lines changed

pkg/shp/cmd/build/create.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ func (c *CreateCommand) Cmd() *cobra.Command {
3535
}
3636

3737
// Complete fills internal subcommand structure for future work with user input
38-
func (c *CreateCommand) Complete(params *params.Params, args []string) error {
38+
func (c *CreateCommand) Complete(params *params.Params, io *genericclioptions.IOStreams, args []string) error {
3939
switch len(args) {
4040
case 1:
4141
c.name = args[0]

pkg/shp/cmd/build/delete.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func (c *DeleteCommand) Cmd() *cobra.Command {
4141
}
4242

4343
// Complete fills DeleteSubCommand structure with data obtained from cobra command
44-
func (c *DeleteCommand) Complete(params *params.Params, args []string) error {
44+
func (c *DeleteCommand) Complete(params *params.Params, io *genericclioptions.IOStreams, args []string) error {
4545
c.name = args[0]
4646

4747
return nil

pkg/shp/cmd/build/list.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ func (c *ListCommand) Cmd() *cobra.Command {
3939
}
4040

4141
// Complete fills object with user input data
42-
func (c *ListCommand) Complete(params *params.Params, args []string) error {
42+
func (c *ListCommand) Complete(params *params.Params, io *genericclioptions.IOStreams, args []string) error {
4343
return nil
4444
}
4545

pkg/shp/cmd/build/run.go

+85-33
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,14 @@ type RunCommand struct {
3232
logTail *tail.Tail // follow container logs
3333
tailLogsStarted map[string]bool // controls tail instance per container
3434

35-
buildName string // build name
35+
logLock sync.Mutex
36+
37+
buildName string
3638
buildRunName string
39+
namespace string
3740
buildRunSpec *buildv1alpha1.BuildRunSpec // stores command-line flags
3841
shpClientset buildclientset.Interface
3942
follow bool // flag to tail pod logs
40-
watchLock sync.Mutex
4143
}
4244

4345
const buildRunLongDesc = `
@@ -53,7 +55,7 @@ func (r *RunCommand) Cmd() *cobra.Command {
5355
}
5456

5557
// Complete picks the build resource name from arguments, and instantiate additional components.
56-
func (r *RunCommand) Complete(params *params.Params, args []string) error {
58+
func (r *RunCommand) Complete(params *params.Params, io *genericclioptions.IOStreams, args []string) error {
5759
switch len(args) {
5860
case 1:
5961
r.buildName = args[0]
@@ -66,6 +68,31 @@ func (r *RunCommand) Complete(params *params.Params, args []string) error {
6668
return err
6769
}
6870
r.logTail = tail.NewTail(r.Cmd().Context(), clientset)
71+
r.ioStreams = io
72+
r.namespace = params.Namespace()
73+
if r.follow {
74+
if r.shpClientset, err = params.ShipwrightClientSet(); err != nil {
75+
return err
76+
}
77+
78+
kclientset, err := params.ClientSet()
79+
if err != nil {
80+
return err
81+
}
82+
to, err := params.RequestTimeout()
83+
if err != nil {
84+
return err
85+
}
86+
r.pw, err = reactor.NewPodWatcher(r.Cmd().Context(), to, kclientset, params.Namespace())
87+
if err != nil {
88+
return err
89+
}
90+
91+
r.pw.WithOnPodModifiedFn(r.onEvent)
92+
r.pw.WithTimeoutPodFn(r.onTimeout)
93+
r.pw.WithNoPodEventsYetFn(r.onNoPodEventsYet)
94+
95+
}
6996

7097
// overwriting build-ref name to use what's on arguments
7198
return r.Cmd().Flags().Set(flags.BuildrefNameFlag, r.buildName)
@@ -92,11 +119,49 @@ func (r *RunCommand) tailLogs(pod *corev1.Pod) {
92119
}
93120
}
94121

122+
// onNoPodEventsYet reacts to the pod watcher telling us it has not received any pod events for our build run
123+
func (r *RunCommand) onNoPodEventsYet() {
124+
r.Log(fmt.Sprintf("BuildRun %q log following has not observed any pod events yet.", r.buildRunName))
125+
br, err := r.shpClientset.ShipwrightV1alpha1().BuildRuns(r.namespace).Get(r.cmd.Context(), r.buildRunName, metav1.GetOptions{})
126+
if err != nil {
127+
r.Log(fmt.Sprintf("error accessing BuildRun %q: %s", r.buildRunName, err.Error()))
128+
return
129+
}
130+
131+
c := br.Status.GetCondition(buildv1alpha1.Succeeded)
132+
giveUp := false
133+
msg := ""
134+
switch {
135+
case c != nil && c.Status == corev1.ConditionTrue:
136+
giveUp = true
137+
msg = fmt.Sprintf("BuildRun '%s' has been marked as successful.\n", br.Name)
138+
case c != nil && c.Status == corev1.ConditionFalse:
139+
giveUp = true
140+
msg = fmt.Sprintf("BuildRun '%s' has been marked as failed.\n", br.Name)
141+
case br.IsCanceled():
142+
giveUp = true
143+
msg = fmt.Sprintf("BuildRun '%s' has been canceled.\n", br.Name)
144+
case br.DeletionTimestamp != nil:
145+
giveUp = true
146+
msg = fmt.Sprintf("BuildRun '%s' has been deleted.\n", br.Name)
147+
case !br.HasStarted():
148+
r.Log(fmt.Sprintf("BuildRun '%s' has been marked as failed.\n", br.Name))
149+
}
150+
if giveUp {
151+
r.Log(msg)
152+
r.Log(fmt.Sprintf("exiting 'ship build run --follow' for BuildRun %q", br.Name))
153+
r.stop()
154+
}
155+
156+
}
157+
158+
// onTimeout reacts to either the context or request timeout causing the pod watcher to exit
159+
func (r *RunCommand) onTimeout(msg string) {
160+
r.Log(fmt.Sprintf("BuildRun %q log following has stopped because: %q\n", r.buildRunName, msg))
161+
}
162+
95163
// onEvent reacts on pod state changes, to start and stop tailing container logs.
96164
func (r *RunCommand) onEvent(pod *corev1.Pod) error {
97-
// found more data races during unit testing with concurrent events coming in
98-
r.watchLock.Lock()
99-
defer r.watchLock.Unlock()
100165
switch pod.Status.Phase {
101166
case corev1.PodRunning:
102167
// graceful time to wait for container start
@@ -118,14 +183,14 @@ func (r *RunCommand) onEvent(pod *corev1.Pod) error {
118183
err = fmt.Errorf("build pod '%s' has failed", pod.GetName())
119184
}
120185
// see if because of deletion or cancelation
121-
fmt.Fprintf(r.ioStreams.Out, msg)
186+
r.Log(msg)
122187
r.stop()
123188
return err
124189
case corev1.PodSucceeded:
125-
fmt.Fprintf(r.ioStreams.Out, "Pod '%s' has succeeded!\n", pod.GetName())
190+
r.Log(fmt.Sprintf("Pod '%s' has succeeded!\n", pod.GetName()))
126191
r.stop()
127192
default:
128-
fmt.Fprintf(r.ioStreams.Out, "Pod '%s' is in state %q...\n", pod.GetName(), string(pod.Status.Phase))
193+
r.Log(fmt.Sprintf("Pod '%s' is in state %q...\n", pod.GetName(), string(pod.Status.Phase)))
129194
// handle any issues with pulling images that may fail
130195
for _, c := range pod.Status.Conditions {
131196
if c.Type == corev1.PodInitialized || c.Type == corev1.ContainersReady {
@@ -146,10 +211,7 @@ func (r *RunCommand) stop() {
146211

147212
// Run creates a BuildRun resource based on Build's name informed on arguments.
148213
func (r *RunCommand) Run(params *params.Params, ioStreams *genericclioptions.IOStreams) error {
149-
// ran into some data race conditions during unit test with this starting up, but pod events
150-
// coming in before we completed initialization below
151-
r.watchLock.Lock()
152-
// resource using GenerateName, which will provice a unique instance
214+
// resource using GenerateName, which will provide a unique instance
153215
br := &buildv1alpha1.BuildRun{
154216
ObjectMeta: metav1.ObjectMeta{
155217
GenerateName: fmt.Sprintf("%s-", r.buildName),
@@ -162,7 +224,7 @@ func (r *RunCommand) Run(params *params.Params, ioStreams *genericclioptions.IOS
162224
if err != nil {
163225
return err
164226
}
165-
br, err = clientset.ShipwrightV1alpha1().BuildRuns(params.Namespace()).Create(r.cmd.Context(), br, metav1.CreateOptions{})
227+
br, err = clientset.ShipwrightV1alpha1().BuildRuns(r.namespace).Create(r.cmd.Context(), br, metav1.CreateOptions{})
166228
if err != nil {
167229
return err
168230
}
@@ -172,15 +234,7 @@ func (r *RunCommand) Run(params *params.Params, ioStreams *genericclioptions.IOS
172234
return nil
173235
}
174236

175-
r.ioStreams = ioStreams
176-
kclientset, err := params.ClientSet()
177-
if err != nil {
178-
return err
179-
}
180237
r.buildRunName = br.Name
181-
if r.shpClientset, err = params.ShipwrightClientSet(); err != nil {
182-
return err
183-
}
184238

185239
// instantiating a pod watcher with a specific label-selector to find the indented pod where the
186240
// actual build started by this subcommand is being executed, including the randomized buildrun
@@ -190,19 +244,17 @@ func (r *RunCommand) Run(params *params.Params, ioStreams *genericclioptions.IOS
190244
r.buildName,
191245
br.GetName(),
192246
)}
193-
r.pw, err = reactor.NewPodWatcher(r.Cmd().Context(), kclientset, listOpts, params.Namespace())
194-
if err != nil {
195-
return err
196-
}
197-
198-
r.pw.WithOnPodModifiedFn(r.onEvent)
199-
// cannot defer with unlock up top because r.pw.Start() blocks; but the erroring out above kills the
200-
// cli invocation, so it does not matter
201-
r.watchLock.Unlock()
202-
_, err = r.pw.Start()
247+
_, err = r.pw.Start(listOpts)
203248
return err
204249
}
205250

251+
func (r *RunCommand) Log(msg string) {
252+
// concurrent fmt.Fprintf(r.ioStream.Out...) calls need locking to avoid data races, as we 'write' to the stream
253+
r.logLock.Lock()
254+
defer r.logLock.Unlock()
255+
fmt.Fprintf(r.ioStreams.Out, msg)
256+
}
257+
206258
// runCmd instantiate the "build run" sub-command using common BuildRun flags.
207259
func runCmd() runner.SubCommand {
208260
cmd := &cobra.Command{
@@ -214,7 +266,7 @@ func runCmd() runner.SubCommand {
214266
cmd: cmd,
215267
buildRunSpec: flags.BuildRunSpecFromFlags(cmd.Flags()),
216268
tailLogsStarted: make(map[string]bool),
217-
watchLock: sync.Mutex{},
269+
logLock: sync.Mutex{},
218270
}
219271
cmd.Flags().BoolVarP(&runCommand.follow, "follow", "F", runCommand.follow, "Start a build and watch its log until it completes or fails.")
220272
return runCommand

pkg/shp/cmd/build/run_test.go

+48-35
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
package build
22

33
import (
4-
"runtime"
4+
"bytes"
55
"strings"
66
"sync"
77
"testing"
8-
"time"
98

109
buildv1alpha1 "github.com/shipwright-io/build/pkg/apis/build/v1alpha1"
1110
shpfake "github.com/shipwright-io/build/pkg/client/clientset/versioned/fake"
1211
"github.com/shipwright-io/cli/pkg/shp/flags"
1312
"github.com/shipwright-io/cli/pkg/shp/params"
13+
"github.com/shipwright-io/cli/pkg/shp/reactor"
1414
"github.com/spf13/cobra"
15+
1516
corev1 "k8s.io/api/core/v1"
1617
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1718
kruntime "k8s.io/apimachinery/pkg/runtime"
18-
"k8s.io/apimachinery/pkg/util/wait"
1919
"k8s.io/cli-runtime/pkg/genericclioptions"
2020
"k8s.io/client-go/kubernetes/fake"
2121
fakekubetesting "k8s.io/client-go/testing"
@@ -26,6 +26,8 @@ func TestStartBuildRunFollowLog(t *testing.T) {
2626
name string
2727
phase corev1.PodPhase
2828
logText string
29+
to string
30+
noPodYet bool
2931
cancelled bool
3032
brDeleted bool
3133
podDeleted bool
@@ -76,6 +78,16 @@ func TestStartBuildRunFollowLog(t *testing.T) {
7678
// k8s folks to "be careful" with it; fortunately, what we do for tail and pod_watcher so far is within
7779
// the realm of reliable.
7880
},
81+
{
82+
name: "timeout",
83+
to: "1s",
84+
logText: reactor.RequestTimeoutMessage,
85+
},
86+
{
87+
name: "no pod yet",
88+
noPodYet: true,
89+
logText: "has not observed any pod events yet",
90+
},
7991
}
8092

8193
for _, test := range tests {
@@ -103,6 +115,7 @@ func TestStartBuildRunFollowLog(t *testing.T) {
103115
},
104116
}
105117
shpclientset := shpfake.NewSimpleClientset()
118+
106119
// need this reactor since the Run method uses the ObjectMeta.GenerateName k8s feature to generate the random
107120
// name for the BuildRun. However, for our purposes with unit testing, we want to control the name of the BuildRun
108121
// to facilitate the list/selector via labels that is also employed by the Run method.
@@ -116,7 +129,10 @@ func TestStartBuildRunFollowLog(t *testing.T) {
116129
return true, br, nil
117130
}
118131
shpclientset.PrependReactor("get", "buildruns", getReactorFunc)
119-
kclientset := fake.NewSimpleClientset(pod)
132+
kclientset := fake.NewSimpleClientset()
133+
if !test.noPodYet {
134+
kclientset = fake.NewSimpleClientset(pod)
135+
}
120136
ccmd := &cobra.Command{}
121137
cmd := &RunCommand{
122138
cmd: ccmd,
@@ -125,12 +141,16 @@ func TestStartBuildRunFollowLog(t *testing.T) {
125141
follow: true,
126142
shpClientset: shpclientset,
127143
tailLogsStarted: make(map[string]bool),
128-
watchLock: sync.Mutex{},
144+
logLock: sync.Mutex{},
129145
}
130146

131147
// set up context
132148
cmd.Cmd().ExecuteC()
133-
param := params.NewParamsForTest(kclientset, shpclientset, nil, metav1.NamespaceDefault)
149+
pm := genericclioptions.NewConfigFlags(true)
150+
if len(test.to) > 0 {
151+
pm.Timeout = &test.to
152+
}
153+
param := params.NewParamsForTest(kclientset, shpclientset, pm, metav1.NamespaceDefault)
134154

135155
ioStreams, _, out, _ := genericclioptions.NewTestIOStreams()
136156

@@ -143,7 +163,13 @@ func TestStartBuildRunFollowLog(t *testing.T) {
143163
pod.DeletionTimestamp = &metav1.Time{}
144164
}
145165

146-
cmd.Complete(param, []string{name})
166+
cmd.Complete(param, &ioStreams, []string{name})
167+
if len(test.to) > 0 {
168+
cmd.Run(param, &ioStreams)
169+
checkLog(test.name, test.logText, cmd, out, t)
170+
continue
171+
}
172+
147173
go func() {
148174
err := cmd.Run(param, &ioStreams)
149175
if err != nil {
@@ -152,35 +178,22 @@ func TestStartBuildRunFollowLog(t *testing.T) {
152178

153179
}()
154180

155-
// yield the processor, so the initialization in Run can occur; afterward, the watchLock should allow
156-
// coordination between Run and onEvent
157-
runtime.Gosched()
158-
159-
// even with our release of the context above with Gosched(), repeated runs in CI have surfaced occasional timing issues between
160-
// cmd.Run() finishing initialization and cmd.onEvent trying to used struct variables, resulting in panics; so we employ the lock here
161-
// to insure the required initializations have run; this is still better than a generic "sleep log enough for
162-
// the init to occur.
163-
cmd.watchLock.Lock()
164-
err := wait.PollImmediate(1*time.Second, 3*time.Second, func() (done bool, err error) {
165-
// check any of the vars on RunCommand that are used in onEvent and make sure they are set;
166-
// we are verifying the initialization done in Run() on RunCommand is complete
167-
if cmd.pw != nil && cmd.ioStreams != nil && cmd.shpClientset != nil {
168-
cmd.watchLock.Unlock()
169-
return true, nil
170-
}
171-
return false, nil
172-
})
173-
if err != nil {
174-
cmd.watchLock.Unlock()
175-
t.Errorf("Run initialization did not complete in time")
176-
}
177-
178-
// mimic watch events, bypassing k8s fake client watch hoopla whose plug points are not always useful;
179-
pod.Status.Phase = test.phase
180-
cmd.onEvent(pod)
181-
if !strings.Contains(out.String(), test.logText) {
182-
t.Errorf("test %s: unexpected output: %s", test.name, out.String())
181+
if !test.noPodYet {
182+
// mimic watch events, bypassing k8s fake client watch hoopla whose plug points are not always useful;
183+
pod.Status.Phase = test.phase
184+
cmd.onEvent(pod)
185+
} else {
186+
cmd.onNoPodEventsYet()
183187
}
188+
checkLog(test.name, test.logText, cmd, out, t)
189+
}
190+
}
184191

192+
func checkLog(name, text string, cmd *RunCommand, out *bytes.Buffer, t *testing.T) {
193+
// need to employ log lock since accessing same iostream out used by Run cmd
194+
cmd.logLock.Lock()
195+
defer cmd.logLock.Unlock()
196+
if !strings.Contains(out.String(), text) {
197+
t.Errorf("test %s: unexpected output: %s", name, out.String())
185198
}
186199
}

pkg/shp/cmd/buildrun/cancel.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ func (c *CancelCommand) Cmd() *cobra.Command {
3838
}
3939

4040
// Complete fills in data provided by user
41-
func (c *CancelCommand) Complete(params *params.Params, args []string) error {
41+
func (c *CancelCommand) Complete(params *params.Params, io *genericclioptions.IOStreams, args []string) error {
4242
c.name = args[0]
4343

4444
return nil

0 commit comments

Comments
 (0)