Skip to content

Commit ad9aded

Browse files
committed
Fix e2e startup test
Do more cleanup between runs, collect more logs on failure. Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
1 parent e4cfad2 commit ad9aded

10 files changed

Lines changed: 150 additions & 45 deletions

File tree

.github/workflows/e2e.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,8 @@ jobs:
9696
uses: actions/upload-artifact@v4
9797
if: ${{ failure() }}
9898
with:
99-
name: ${{ matrix.etest}}-journald-logs
100-
path: tests/e2e/${{ matrix.etest }}/*-jlog.txt
99+
name: e2e-${{ matrix.etest}}-logs
100+
path: tests/e2e/${{ matrix.etest }}/*log.txt
101101
retention-days: 30
102102
- name: On Failure, Launch Debug Session
103103
uses: lhotari/action-upterm@v1

tests/e2e/btrfs/btrfs_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ var _ = AfterEach(func() {
7575
var _ = AfterSuite(func() {
7676
if failed {
7777
Expect(e2e.SaveJournalLogs(tc.Servers)).To(Succeed())
78+
Expect(e2e.TailPodLogs(50, tc.AllNodes())).To(Succeed())
7879
}
7980
if !failed || *ci {
8081
Expect(e2e.DestroyCluster()).To(Succeed())

tests/e2e/embeddedmirror/embeddedmirror_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ var _ = AfterEach(func() {
129129
var _ = AfterSuite(func() {
130130
if failed {
131131
Expect(e2e.SaveJournalLogs(tc.AllNodes())).To(Succeed())
132+
Expect(e2e.TailPodLogs(50, tc.AllNodes())).To(Succeed())
132133
} else {
133134
Expect(e2e.GetCoverageReport(tc.AllNodes())).To(Succeed())
134135
}

tests/e2e/externalip/externalip_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ var _ = AfterEach(func() {
150150
var _ = AfterSuite(func() {
151151
if failed {
152152
Expect(e2e.SaveJournalLogs(tc.AllNodes())).To(Succeed())
153+
Expect(e2e.TailPodLogs(50, tc.AllNodes())).To(Succeed())
153154
} else {
154155
Expect(e2e.GetCoverageReport(tc.AllNodes())).To(Succeed())
155156
}

tests/e2e/privateregistry/privateregistry_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ var _ = AfterEach(func() {
134134
var _ = AfterSuite(func() {
135135
if failed {
136136
Expect(e2e.SaveJournalLogs(tc.AllNodes())).To(Succeed())
137+
Expect(e2e.TailPodLogs(50, tc.AllNodes())).To(Succeed())
137138
} else {
138139
Expect(e2e.GetCoverageReport(tc.AllNodes())).To(Succeed())
139140
}

tests/e2e/s3/s3_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ var _ = AfterEach(func() {
158158
var _ = AfterSuite(func() {
159159
if failed {
160160
Expect(e2e.SaveJournalLogs(tc.AllNodes())).To(Succeed())
161+
Expect(e2e.TailPodLogs(50, tc.AllNodes())).To(Succeed())
161162
} else {
162163
Expect(e2e.GetCoverageReport(tc.AllNodes())).To(Succeed())
163164
}

tests/e2e/startup/Vagrantfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def provision(vm, role, role_num, node_num)
3636
k3s.env = %W[K3S_KUBECONFIG_MODE=0644 #{install_type} INSTALL_K3S_SKIP_START=true]
3737
k3s.config_mode = '0644' # side-step https://github.com/k3s-io/k3s/issues/4321
3838
k3s.config = <<~YAML
39+
debug: true
3940
token: vagrant
4041
node-external-ip: #{NETWORK_PREFIX}.100
4142
flannel-iface: eth1
@@ -48,6 +49,7 @@ def provision(vm, role, role_num, node_num)
4849
k3s.config_mode = '0644' # side-step https://github.com/k3s-io/k3s/issues/4321
4950
k3s.config = <<~YAML
5051
server: "https://#{NETWORK_PREFIX}.100:6443"
52+
debug: true
5153
token: vagrant
5254
node-external-ip: #{node_ip}
5355
flannel-iface: eth1

tests/e2e/startup/startup_test.go

Lines changed: 48 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,11 @@ func StartK3sCluster(nodes []e2e.VagrantNode, serverYAML string, agentYAML strin
4242
var resetCmd string
4343
var startCmd string
4444
if strings.Contains(node.String(), "server") {
45-
resetCmd = "head -n 3 /etc/rancher/k3s/config.yaml > /tmp/config.yaml && sudo mv /tmp/config.yaml /etc/rancher/k3s/config.yaml"
45+
resetCmd = "head -n 4 /etc/rancher/k3s/config.yaml > /tmp/config.yaml && sudo mv /tmp/config.yaml /etc/rancher/k3s/config.yaml"
4646
yamlCmd = fmt.Sprintf("echo '%s' >> /etc/rancher/k3s/config.yaml", serverYAML)
4747
startCmd = "systemctl start k3s"
4848
} else {
49-
resetCmd = "head -n 4 /etc/rancher/k3s/config.yaml > /tmp/config.yaml && sudo mv /tmp/config.yaml /etc/rancher/k3s/config.yaml"
49+
resetCmd = "head -n 5 /etc/rancher/k3s/config.yaml > /tmp/config.yaml && sudo mv /tmp/config.yaml /etc/rancher/k3s/config.yaml"
5050
yamlCmd = fmt.Sprintf("echo '%s' >> /etc/rancher/k3s/config.yaml", agentYAML)
5151
startCmd = "systemctl start k3s-agent"
5252
}
@@ -68,16 +68,17 @@ func KillK3sCluster(nodes []e2e.VagrantNode) error {
6868
if _, err := node.RunCmdOnNode("k3s-killall.sh"); err != nil {
6969
return err
7070
}
71-
if _, err := node.RunCmdOnNode("journalctl --flush --sync --rotate --vacuum-size=1"); err != nil {
71+
if _, err := node.RunCmdOnNode("sh -c 'docker ps -qa | xargs -r docker rm -fv'"); err != nil {
7272
return err
7373
}
74-
if _, err := node.RunCmdOnNode("rm -rf /etc/rancher/k3s/config.yaml.d"); err != nil {
74+
if _, err := node.RunCmdOnNode("rm -rf /etc/rancher/k3s/config.yaml.d /var/lib/kubelet/pods /var/lib/rancher/k3s/agent/etc /var/lib/rancher/k3s/agent/containerd /var/lib/rancher/k3s/server/db /var/log/pods /run/k3s /run/flannel"); err != nil {
7575
return err
7676
}
77-
if strings.Contains(node.String(), "server") {
78-
if _, err := node.RunCmdOnNode("rm -rf /var/lib/rancher/k3s/server/db"); err != nil {
79-
return err
80-
}
77+
if _, err := node.RunCmdOnNode("systemctl restart containerd docker"); err != nil {
78+
return err
79+
}
80+
if _, err := node.RunCmdOnNode("journalctl --flush --sync --rotate --vacuum-size=1"); err != nil {
81+
return err
8182
}
8283
}
8384
return nil
@@ -206,38 +207,6 @@ var _ = Describe("Various Startup Configurations", Ordered, func() {
206207
Expect(err).NotTo(HaveOccurred())
207208
})
208209
})
209-
Context("Verify CRI-Dockerd", func() {
210-
It("Starts K3s with no issues", func() {
211-
dockerYAML := "docker: true"
212-
err := StartK3sCluster(tc.AllNodes(), dockerYAML, dockerYAML)
213-
Expect(err).NotTo(HaveOccurred(), e2e.GetVagrantLog(err))
214-
215-
By("CLUSTER CONFIG")
216-
By("OS:" + *nodeOS)
217-
By(tc.Status())
218-
tc.KubeconfigFile, err = e2e.GenKubeconfigFile(tc.Servers[0].String())
219-
Expect(err).NotTo(HaveOccurred())
220-
})
221-
222-
It("Checks node and pod status", func() {
223-
By("Fetching node status")
224-
Eventually(func() error {
225-
return tests.NodesReady(tc.KubeconfigFile, e2e.VagrantSlice(tc.AllNodes()))
226-
}, "360s", "5s").Should(Succeed())
227-
228-
Eventually(func() error {
229-
return tests.AllPodsUp(tc.KubeconfigFile)
230-
}, "360s", "5s").Should(Succeed())
231-
Eventually(func() error {
232-
return tests.CheckDefaultDeployments(tc.KubeconfigFile)
233-
}, "300s", "10s").Should(Succeed())
234-
e2e.DumpPods(tc.KubeconfigFile)
235-
})
236-
It("Kills the cluster", func() {
237-
err := KillK3sCluster(tc.AllNodes())
238-
Expect(err).NotTo(HaveOccurred())
239-
})
240-
})
241210
Context("Verify prefer-bundled-bin flag", func() {
242211
It("Starts K3s with no issues", func() {
243212
preferBundledYAML := "prefer-bundled-bin: true"
@@ -256,7 +225,6 @@ var _ = Describe("Various Startup Configurations", Ordered, func() {
256225
Eventually(func() error {
257226
return tests.NodesReady(tc.KubeconfigFile, e2e.VagrantSlice(tc.AllNodes()))
258227
}, "360s", "5s").Should(Succeed())
259-
260228
Eventually(func() error {
261229
return tests.AllPodsUp(tc.KubeconfigFile)
262230
}, "360s", "5s").Should(Succeed())
@@ -288,7 +256,9 @@ var _ = Describe("Various Startup Configurations", Ordered, func() {
288256
Eventually(func() error {
289257
return tests.NodesReady(tc.KubeconfigFile, e2e.VagrantSlice(tc.Agents))
290258
}, "360s", "5s").Should(Succeed())
291-
259+
Eventually(func() error {
260+
return tests.AllPodsUp(tc.KubeconfigFile)
261+
}, "360s", "5s").Should(Succeed())
292262
Eventually(func() error {
293263
return tests.CheckDefaultDeployments(tc.KubeconfigFile)
294264
}, "300s", "10s").Should(Succeed())
@@ -389,6 +359,37 @@ var _ = Describe("Various Startup Configurations", Ordered, func() {
389359
Expect(err).NotTo(HaveOccurred())
390360
})
391361
})
362+
Context("Verify CRI-Dockerd", func() {
363+
It("Starts K3s with no issues", func() {
364+
dockerYAML := "docker: true"
365+
err := StartK3sCluster(tc.AllNodes(), dockerYAML, dockerYAML)
366+
Expect(err).NotTo(HaveOccurred(), e2e.GetVagrantLog(err))
367+
368+
By("CLUSTER CONFIG")
369+
By("OS:" + *nodeOS)
370+
By(tc.Status())
371+
tc.KubeconfigFile, err = e2e.GenKubeconfigFile(tc.Servers[0].String())
372+
Expect(err).NotTo(HaveOccurred())
373+
})
374+
375+
It("Checks node and pod status", func() {
376+
By("Fetching node status")
377+
Eventually(func() error {
378+
return tests.NodesReady(tc.KubeconfigFile, e2e.VagrantSlice(tc.AllNodes()))
379+
}, "360s", "5s").Should(Succeed())
380+
Eventually(func() error {
381+
return tests.AllPodsUp(tc.KubeconfigFile)
382+
}, "360s", "5s").Should(Succeed())
383+
Eventually(func() error {
384+
return tests.CheckDefaultDeployments(tc.KubeconfigFile)
385+
}, "300s", "10s").Should(Succeed())
386+
e2e.DumpPods(tc.KubeconfigFile)
387+
})
388+
It("Kills the cluster", func() {
389+
err := KillK3sCluster(tc.AllNodes())
390+
Expect(err).NotTo(HaveOccurred())
391+
})
392+
})
392393
})
393394

394395
var failed bool
@@ -399,7 +400,12 @@ var _ = AfterEach(func() {
399400
var _ = AfterSuite(func() {
400401
if failed {
401402
AddReportEntry("config", e2e.GetConfig(tc.AllNodes()))
403+
AddReportEntry("pods", e2e.DescribePods(tc.KubeconfigFile))
402404
Expect(e2e.SaveJournalLogs(tc.AllNodes())).To(Succeed())
405+
Expect(e2e.SaveDocker(tc.AllNodes())).To(Succeed())
406+
Expect(e2e.TailPodLogs(50, tc.AllNodes())).To(Succeed())
407+
Expect(e2e.SaveNetwork(tc.AllNodes())).To(Succeed())
408+
Expect(e2e.SaveKernel(tc.AllNodes())).To(Succeed())
403409
} else {
404410
Expect(e2e.GetCoverageReport(tc.AllNodes())).To(Succeed())
405411
}

tests/e2e/testutils.go

Lines changed: 92 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -418,6 +418,11 @@ func GenReport(specReport ginkgo.SpecReport) {
418418
fmt.Printf("%s", status)
419419
}
420420

421+
func (v VagrantNode) TailPodLogs(lines int) (string, error) {
422+
cmd := fmt.Sprintf("sh -c 'tail -n %d /var/log/pods/*/*/*'", lines)
423+
return v.RunCmdOnNode(cmd)
424+
}
425+
421426
func (v VagrantNode) GetJournalLogs() (string, error) {
422427
cmd := "journalctl -u k3s* --no-pager"
423428
return v.RunCmdOnNode(cmd)
@@ -436,6 +441,83 @@ func TailJournalLogs(lines int, nodes []VagrantNode) string {
436441
return logs.String()
437442
}
438443

444+
func SaveDocker(nodes []VagrantNode) error {
445+
cmd := "sh -xc 'docker ps -a --no-trunc; docker info; journalctl -u containerd -u docker'"
446+
for _, node := range nodes {
447+
logs, err := node.RunCmdOnNode(cmd)
448+
if err != nil {
449+
logs = fmt.Sprintf("** failed to list docker containers and logs for node %s: %v **", node, err)
450+
}
451+
lf, err := os.Create(node.String() + "-dockerlog.txt")
452+
if err != nil {
453+
return err
454+
}
455+
defer lf.Close()
456+
if _, err := lf.Write([]byte(logs)); err != nil {
457+
return err
458+
}
459+
}
460+
return nil
461+
}
462+
463+
func SaveKernel(nodes []VagrantNode) error {
464+
cmd := "dmesg"
465+
for _, node := range nodes {
466+
logs, err := node.RunCmdOnNode(cmd)
467+
if err != nil {
468+
logs = fmt.Sprintf("** failed to read kernel message log for node %s: %v **", node, err)
469+
}
470+
lf, err := os.Create(node.String() + "-kernlog.txt")
471+
if err != nil {
472+
return err
473+
}
474+
defer lf.Close()
475+
if _, err := lf.Write([]byte(logs)); err != nil {
476+
return err
477+
}
478+
}
479+
return nil
480+
}
481+
482+
func SaveNetwork(nodes []VagrantNode) error {
483+
cmd := "sh -xc 'ip addr show; ip route show; ip neighbor show; iptables-save'"
484+
for _, node := range nodes {
485+
logs, err := node.RunCmdOnNode(cmd)
486+
if err != nil {
487+
logs = fmt.Sprintf("** failed to read network config for node %s: %v **", node, err)
488+
}
489+
lf, err := os.Create(node.String() + "-netlog.txt")
490+
if err != nil {
491+
return err
492+
}
493+
defer lf.Close()
494+
if _, err := lf.Write([]byte(logs)); err != nil {
495+
return err
496+
}
497+
}
498+
return nil
499+
}
500+
501+
// TailPodLogs saves the pod logs of each node to a <NAME>-podlog.txt file.
502+
// When used in GHA CI, the logs are uploaded as an artifact on failure.
503+
func TailPodLogs(lines int, nodes []VagrantNode) error {
504+
for _, node := range nodes {
505+
logs, err := node.TailPodLogs(lines)
506+
if err != nil {
507+
logs = fmt.Sprintf("** failed to read pod logs for node %s: %v **", node, err)
508+
}
509+
lf, err := os.Create(node.String() + "-podlog.txt")
510+
if err != nil {
511+
return err
512+
}
513+
defer lf.Close()
514+
if _, err := lf.Write([]byte(logs)); err != nil {
515+
return err
516+
}
517+
}
518+
return nil
519+
}
520+
439521
// SaveJournalLogs saves the journal logs of each node to a <NAME>-jlog.txt file.
440522
// When used in GHA CI, the logs are uploaded as an artifact on failure.
441523
func SaveJournalLogs(nodes []VagrantNode) error {
@@ -497,11 +579,20 @@ func DumpNodes(kubeConfig string) {
497579
}
498580

499581
func DumpPods(kubeConfig string) {
500-
cmd := "kubectl get pods -o wide --no-headers -A"
582+
cmd := "kubectl get pods -o wide --no-headers -A --kubeconfig=" + kubeConfig
501583
res, _ := RunCommand(cmd)
502584
fmt.Println(strings.TrimSpace(res))
503585
}
504586

587+
func DescribePods(kubeConfig string) string {
588+
cmd := "kubectl describe pod -A --kubeconfig=" + kubeConfig
589+
res, err := RunCommand(cmd)
590+
if err != nil {
591+
return fmt.Sprintf("Failed to describe pods: %v", err)
592+
}
593+
return res
594+
}
595+
505596
// RestartCluster restarts the k3s service on each node given
506597
func RestartCluster(nodes []VagrantNode) error {
507598
for _, node := range nodes {

tests/e2e/wasm/wasm_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ var _ = AfterEach(func() {
123123
var _ = AfterSuite(func() {
124124
if failed {
125125
Expect(e2e.SaveJournalLogs(tc.AllNodes())).To(Succeed())
126+
Expect(e2e.TailPodLogs(50, tc.AllNodes())).To(Succeed())
126127
} else {
127128
Expect(e2e.GetCoverageReport(tc.AllNodes())).To(Succeed())
128129
}

0 commit comments

Comments
 (0)