Skip to content

Commit b2a8f53

Browse files
fix daily workflow ci runner
1 parent 2933d44 commit b2a8f53

File tree

5 files changed

+227
-28
lines changed

5 files changed

+227
-28
lines changed

.github/workflows/k3s_run.yaml

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ on:
77
- 'main'
88
workflow_dispatch:
99
schedule:
10-
- cron: '0 1 * * *' # Runs daily at 1:00 AM UTC (6:30 AM IST)
10+
- cron: '0 4 * * *' # Runs daily at 4:00 AM UTC (9:30 AM IST)
1111

1212
jobs:
1313
setup_rancher:
@@ -55,7 +55,7 @@ jobs:
5555
- name: Install K3S Version ${{ matrix.k3s_version }} & Rancher Version ${{ matrix.rancher_version }}
5656
id: install_k3s_and_rancher
5757
run: |
58-
sleep 120
58+
sleep 60
5959
set -e
6060
go test -timeout 30m -run ^TestE2E$ github.com/rancher/observability-e2e/installations/k3s -v -count=1 -ginkgo.v
6161
@@ -68,9 +68,10 @@ jobs:
6868
- name: Run Observability Charts Tests
6969
id: run_observability_tests
7070
run: |
71-
sleep 60
71+
sleep 180
7272
set -e
7373
TEST_LABEL_FILTER=installation go test -timeout 20m github.com/rancher/observability-e2e/tests/e2e -v -count=1 -ginkgo.v | tee ~/artifacts/test-output-installation-${{ matrix.k3s_version }}.txt
74+
sleep 120
7475
TEST_LABEL_FILTER=E2E go test -timeout 30m github.com/rancher/observability-e2e/tests/e2e -v -count=1 -ginkgo.v | tee ~/artifacts/test-output-e2e-${{ matrix.k3s_version }}.txt
7576
7677
- name: Run Installation Charts Tests For Backup and Restore
@@ -87,8 +88,11 @@ jobs:
8788
sleep 60
8889
set -e
8990
TEST_LABEL_FILTER=beforeUpgrade go test -timeout 20m github.com/rancher/observability-e2e/tests/e2e -v -count=1 -ginkgo.v | tee ~/artifacts/test-output-upgrade-${{ matrix.k3s_version }}.txt
91+
sleep 120
9092
TEST_LABEL_FILTER=E2E go test -timeout 30m github.com/rancher/observability-e2e/tests/e2e -v -count=1 -ginkgo.v | tee ~/artifacts/test-output-e2e-${{ matrix.k3s_version }}.txt
93+
sleep 120
9194
TEST_LABEL_FILTER=afterUpgrade go test -timeout 20m github.com/rancher/observability-e2e/tests/e2e -v -count=1 -ginkgo.v | tee ~/artifacts/test-output-upgrade-${{ matrix.k3s_version }}.txt
95+
sleep 120
9296
TEST_LABEL_FILTER=E2E go test -timeout 30m github.com/rancher/observability-e2e/tests/e2e -v -count=1 -ginkgo.v | tee ~/artifacts/test-output-e2e-${{ matrix.k3s_version }}.txt
9397
9498
- name: Run Tests and Prepare Artifact Name

tests/helper/charts/prometheusfederator.go

Lines changed: 30 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package charts
33
import (
44
"context"
55
"encoding/json"
6+
"errors"
67
"fmt"
78
"strings"
89

@@ -40,6 +41,17 @@ func InstallPrometheusFederatorChart(client *rancher.Client, installOptions *Ins
4041
"podSecurity": map[string]interface{}{
4142
"enabled": prometheusFederatorOpts.EnablePodSecurity,
4243
},
44+
// ADDED: Resource limits for federator
45+
"resources": map[string]interface{}{
46+
"requests": map[string]interface{}{
47+
"cpu": "50m",
48+
"memory": "100Mi",
49+
},
50+
"limits": map[string]interface{}{
51+
"cpu": "100m",
52+
"memory": "250Mi",
53+
},
54+
},
4355
}
4456

4557
// Create chart install configurations for the CRD and the main chart.
@@ -71,9 +83,9 @@ func InstallPrometheusFederatorChart(client *rancher.Client, installOptions *Ins
7183
}
7284

7385
// Start watching the App resource.
74-
timeoutSeconds := int64(1 * 60) // 5 minutes
86+
timeoutSeconds := int64(10 * 60) // 10 minutes
7587
watchInterface, err := catalogClient.Apps(PrometheusFederatorNamespace).Watch(context.TODO(), metav1.ListOptions{
76-
FieldSelector: "metadata.name=" + RancherMonitoringName,
88+
FieldSelector: "metadata.name=" + PrometheusFederatorName, // FIX: Was watching RancherMonitoringName
7789
TimeoutSeconds: &timeoutSeconds,
7890
})
7991
if err != nil {
@@ -109,7 +121,7 @@ func InstallPrometheusFederatorChart(client *rancher.Client, installOptions *Ins
109121
// Handle the result.
110122
if err != nil {
111123
if err.Error() == wait.TimeoutError {
112-
return fmt.Errorf("timeout: prometheus-federator chart was not installed within 5 minutes")
124+
return fmt.Errorf("timeout: prometheus-federator chart was not installed within 10 minutes")
113125
}
114126
return err
115127
}
@@ -140,6 +152,17 @@ func UpgradePrometheusFederatorChart(client *rancher.Client, installOptions *Ins
140152
"podSecurity": map[string]interface{}{
141153
"enabled": prometheusFederatorOpts.EnablePodSecurity,
142154
},
155+
// ADDED: Resource limits for federator
156+
"resources": map[string]interface{}{
157+
"requests": map[string]interface{}{
158+
"cpu": "50m",
159+
"memory": "100Mi",
160+
},
161+
"limits": map[string]interface{}{
162+
"cpu": "100m",
163+
"memory": "250Mi",
164+
},
165+
},
143166
}
144167

145168
// Process monitoring options with provider-specific prefixes
@@ -190,9 +213,9 @@ func UpgradePrometheusFederatorChart(client *rancher.Client, installOptions *Ins
190213
}
191214

192215
// Start watching the App resource.
193-
timeoutSeconds := int64(1 * 60) // 5 minutes
216+
timeoutSeconds := int64(10 * 60) // 10 minutes
194217
watchInterface, err := catalogClient.Apps(PrometheusFederatorNamespace).Watch(context.TODO(), metav1.ListOptions{
195-
FieldSelector: "metadata.name=" + RancherMonitoringName,
218+
FieldSelector: "metadata.name=" + PrometheusFederatorName, // FIX: Was watching RancherMonitoringName
196219
TimeoutSeconds: &timeoutSeconds,
197220
})
198221
if err != nil {
@@ -227,8 +250,8 @@ func UpgradePrometheusFederatorChart(client *rancher.Client, installOptions *Ins
227250

228251
// Handle the result.
229252
if err != nil {
230-
if err.Error() == wait.TimeoutError {
231-
return fmt.Errorf("timeout: prometheus-federator chart was not installed within 5 minutes")
253+
if err.Error() == wait.TimeoutError || errors.Is(err, context.DeadlineExceeded) {
254+
return fmt.Errorf("timeout: prometheus-federator chart was not installed within 10 minutes")
232255
}
233256
return err
234257
}

tests/helper/charts/rancheralerting.go

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,31 @@ func InstallRancherAlertingChart(client *rancher.Client, installOptions *Install
3838
alertingValues := map[string]interface{}{
3939
"prom2teams": map[string]interface{}{
4040
"enabled": rancherAlertingOpts.Teams,
41+
// ADDED: Resource limits for prom2teams
42+
"resources": map[string]interface{}{
43+
"requests": map[string]interface{}{
44+
"cpu": "50m",
45+
"memory": "50Mi",
46+
},
47+
"limits": map[string]interface{}{
48+
"cpu": "100m",
49+
"memory": "100Mi",
50+
},
51+
},
4152
},
4253
"sachet": map[string]interface{}{
4354
"enabled": rancherAlertingOpts.SMS,
55+
// ADDED: Resource limits for sachet
56+
"resources": map[string]interface{}{
57+
"requests": map[string]interface{}{
58+
"cpu": "50m",
59+
"memory": "50Mi",
60+
},
61+
"limits": map[string]interface{}{
62+
"cpu": "100m",
63+
"memory": "100Mi",
64+
},
65+
},
4466
},
4567
}
4668

@@ -73,7 +95,7 @@ func InstallRancherAlertingChart(client *rancher.Client, installOptions *Install
7395
}
7496

7597
// Start watching the App resource.
76-
timeoutSeconds := int64(5 * 60) // 5 minutes
98+
timeoutSeconds := int64(10 * 60) // 10 minutes
7799
watchInterface, err := catalogClient.Apps(RancherAlertingNamespace).Watch(context.TODO(), metav1.ListOptions{
78100
FieldSelector: "metadata.name=" + RancherAlertingName,
79101
TimeoutSeconds: &timeoutSeconds,
@@ -111,7 +133,7 @@ func InstallRancherAlertingChart(client *rancher.Client, installOptions *Install
111133
// Handle the result.
112134
if err != nil {
113135
if err.Error() == wait.TimeoutError {
114-
return fmt.Errorf("timeout: rancher-alerting-drivers chart was not installed within 5 minutes")
136+
return fmt.Errorf("timeout: rancher-alerting-drivers chart was not installed within 10 minutes")
115137
}
116138
return err
117139
}

tests/helper/charts/rancherlogging.go

Lines changed: 81 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,50 @@ func InstallRancherLoggingChart(client *rancher.Client, installOptions *InstallO
3535
return err
3636
}
3737

38-
// Prepare logging values.
38+
// Prepare logging values with resource limits.
3939
loggingValues := map[string]interface{}{
4040
string(installOptions.Cluster.Provider): map[string]interface{}{
4141
"additionalLoggingSources": map[string]interface{}{
4242
"enabled": rancherLoggingOpts.AdditionalLoggingSources,
4343
},
4444
},
45+
// ADDED: Resource limits for logging components
46+
"fluentbit": map[string]interface{}{
47+
"resources": map[string]interface{}{
48+
"requests": map[string]interface{}{
49+
"cpu": "50m",
50+
"memory": "100Mi",
51+
},
52+
"limits": map[string]interface{}{
53+
"cpu": "100m",
54+
"memory": "250Mi",
55+
},
56+
},
57+
},
58+
"fluentd": map[string]interface{}{
59+
"resources": map[string]interface{}{
60+
"requests": map[string]interface{}{
61+
"cpu": "100m",
62+
"memory": "250Mi",
63+
},
64+
"limits": map[string]interface{}{
65+
"cpu": "200m",
66+
"memory": "500Mi",
67+
},
68+
},
69+
},
70+
"logging-operator": map[string]interface{}{
71+
"resources": map[string]interface{}{
72+
"requests": map[string]interface{}{
73+
"cpu": "50m",
74+
"memory": "100Mi",
75+
},
76+
"limits": map[string]interface{}{
77+
"cpu": "100m",
78+
"memory": "250Mi",
79+
},
80+
},
81+
},
4582
}
4683

4784
// Create chart install configurations.
@@ -84,7 +121,7 @@ func InstallRancherLoggingChart(client *rancher.Client, installOptions *InstallO
84121
}
85122

86123
// Start watching the App resource.
87-
timeoutSeconds := int64(5 * 60) // 5 minutes
124+
timeoutSeconds := int64(15 * 60) // 15 minutes
88125
watchInterface, err := catalogClient.Apps(RancherLoggingNamespace).Watch(context.TODO(), metav1.ListOptions{
89126
FieldSelector: "metadata.name=" + RancherLoggingName,
90127
TimeoutSeconds: &timeoutSeconds,
@@ -122,7 +159,7 @@ func InstallRancherLoggingChart(client *rancher.Client, installOptions *InstallO
122159
// Handle the result.
123160
if err != nil {
124161
if err.Error() == wait.TimeoutError {
125-
return fmt.Errorf("timeout: rancher-logging chart was not installed within 5 minutes")
162+
return fmt.Errorf("timeout: rancher-logging chart was not installed within 15 minutes")
126163
}
127164
return err
128165
}
@@ -148,13 +185,50 @@ func UpgradeRancherLoggingChart(client *rancher.Client, installOptions *InstallO
148185
return err
149186
}
150187

151-
// Prepare logging values.
188+
// Prepare logging values with resource limits.
152189
loggingValues := map[string]interface{}{
153190
string(installOptions.Cluster.Provider): map[string]interface{}{
154191
"additionalLoggingSources": map[string]interface{}{
155192
"enabled": rancherLoggingOpts.AdditionalLoggingSources,
156193
},
157194
},
195+
// ADDED: Resource limits for logging components
196+
"fluentbit": map[string]interface{}{
197+
"resources": map[string]interface{}{
198+
"requests": map[string]interface{}{
199+
"cpu": "50m",
200+
"memory": "100Mi",
201+
},
202+
"limits": map[string]interface{}{
203+
"cpu": "100m",
204+
"memory": "250Mi",
205+
},
206+
},
207+
},
208+
"fluentd": map[string]interface{}{
209+
"resources": map[string]interface{}{
210+
"requests": map[string]interface{}{
211+
"cpu": "100m",
212+
"memory": "250Mi",
213+
},
214+
"limits": map[string]interface{}{
215+
"cpu": "200m",
216+
"memory": "500Mi",
217+
},
218+
},
219+
},
220+
"logging-operator": map[string]interface{}{
221+
"resources": map[string]interface{}{
222+
"requests": map[string]interface{}{
223+
"cpu": "50m",
224+
"memory": "100Mi",
225+
},
226+
"limits": map[string]interface{}{
227+
"cpu": "100m",
228+
"memory": "250Mi",
229+
},
230+
},
231+
},
158232
}
159233

160234
// Process logging options with provider-specific prefixes
@@ -214,7 +288,7 @@ func UpgradeRancherLoggingChart(client *rancher.Client, installOptions *InstallO
214288
}
215289

216290
// Setup watch with timeout
217-
timeoutSeconds := int64(5 * 60) // 5 minute timeout
291+
timeoutSeconds := int64(15 * 60) // 15 minute timeout
218292
adminClient, err := rancher.NewClient(client.RancherConfig.AdminToken, client.Session)
219293
if err != nil {
220294
return err
@@ -260,8 +334,8 @@ func UpgradeRancherLoggingChart(client *rancher.Client, installOptions *InstallO
260334
})
261335

262336
if err != nil {
263-
if errors.Is(err, context.DeadlineExceeded) {
264-
return fmt.Errorf("timeout waiting for %s state (5 minutes)", phase.phaseName)
337+
if errors.Is(err, context.DeadlineExceeded) || err.Error() == wait.TimeoutError {
338+
return fmt.Errorf("timeout waiting for %s state (15 minutes)", phase.phaseName)
265339
}
266340
return err
267341
}

0 commit comments

Comments
 (0)