Skip to content

Commit 5e8c2be

Browse files
committed
surface root cause in sync failure message and cache discovery errors
Signed-off-by: Patroklos Papapetrou <ppapapetrou76@gmail.com>
1 parent b3eda0f commit 5e8c2be

2 files changed

Lines changed: 87 additions & 1 deletion

File tree

gitops-engine/pkg/sync/sync_context.go

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,23 @@ func (sc *syncContext) Sync() {
481481
sc.log.WithValues("skipHooks", sc.skipHooks, "started", sc.started()).Info("Syncing")
482482
tasks, ok := sc.getSyncTasks()
483483
if !ok {
484-
sc.setOperationPhase(common.OperationFailed, "one or more synchronization tasks are not valid")
484+
// Collect distinct error messages from failed resource results so that the
485+
// operation phase message surfaces the actual root cause (e.g. cluster API
486+
// server unreachable) rather than only the generic "not valid" message.
487+
seen := make(map[string]bool)
488+
489+
var errMessages []string
490+
for _, res := range sc.syncRes {
491+
if res.Status == common.ResultCodeSyncFailed && res.Message != "" && !seen[res.Message] {
492+
seen[res.Message] = true
493+
errMessages = append(errMessages, res.Message)
494+
}
495+
}
496+
msg := "one or more synchronization tasks are not valid"
497+
if len(errMessages) > 0 {
498+
msg = fmt.Sprintf("%s: %s", msg, strings.Join(errMessages, "; "))
499+
}
500+
sc.setOperationPhase(common.OperationFailed, msg)
485501
return
486502
}
487503

@@ -1024,6 +1040,7 @@ func (sc *syncContext) getSyncTasks() (_ syncTasks, successful bool) {
10241040
isRetryable := apierrors.IsUnauthorized
10251041

10261042
serverResCache := make(map[schema.GroupVersionKind]*metav1.APIResource)
1043+
serverResErrCache := make(map[schema.GroupVersionKind]error)
10271044

10281045
// check permissions
10291046
for _, task := range tasks {
@@ -1033,6 +1050,8 @@ func (sc *syncContext) getSyncTasks() (_ syncTasks, successful bool) {
10331050
if val, ok := serverResCache[task.groupVersionKind()]; ok {
10341051
serverRes = val
10351052
err = nil
1053+
} else if cachedErr, ok := serverResErrCache[task.groupVersionKind()]; ok {
1054+
err = cachedErr
10361055
} else {
10371056
err = retry.OnError(retry.DefaultRetry, isRetryable, func() error {
10381057
serverRes, err = kubeutil.ServerResourceForGroupVersionKind(sc.disco, task.groupVersionKind(), "get")
@@ -1041,6 +1060,8 @@ func (sc *syncContext) getSyncTasks() (_ syncTasks, successful bool) {
10411060
})
10421061
if serverRes != nil {
10431062
serverResCache[task.groupVersionKind()] = serverRes
1063+
} else if err != nil {
1064+
serverResErrCache[task.groupVersionKind()] = err
10441065
}
10451066
}
10461067

gitops-engine/pkg/sync/sync_context_test.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -922,6 +922,71 @@ func TestServerResourcesRetry(t *testing.T) {
922922
}
923923
}
924924

925+
func TestSync_getSyncTasks_FailureMessage(t *testing.T) {
926+
syncCtx := newTestSyncCtx(nil)
927+
testSvc := testingutils.NewService()
928+
testSvc.SetName("test-service")
929+
testSvc.SetNamespace(testingutils.FakeArgoCDNamespace)
930+
931+
// Mock discovery failure for this resource
932+
fakeDisco := &fakedisco.FakeDiscovery{
933+
Fake: &testcore.Fake{
934+
Resources: []*metav1.APIResourceList{},
935+
},
936+
}
937+
fakeDisco.Fake.PrependReactor("get", "resource", func(action testcore.Action) (handled bool, ret runtime.Object, err error) {
938+
return true, nil, errors.New("discovery failed")
939+
})
940+
syncCtx.disco = fakeDisco
941+
942+
syncCtx.resources = groupResources(ReconciliationResult{
943+
Live: []*unstructured.Unstructured{testSvc},
944+
Target: []*unstructured.Unstructured{testSvc},
945+
})
946+
947+
syncCtx.Sync()
948+
phase, msg, _ := syncCtx.GetState()
949+
950+
assert.Equal(t, synccommon.OperationFailed, phase)
951+
assert.Contains(t, msg, "one or more synchronization tasks are not valid")
952+
assert.Contains(t, msg, "discovery failed")
953+
}
954+
955+
func Test_getSyncTasks_ErrorCaching(t *testing.T) {
956+
syncCtx := newTestSyncCtx(nil)
957+
958+
// Create two services with the same GVK
959+
svc1 := testingutils.NewService()
960+
svc1.SetName("svc1")
961+
svc2 := testingutils.NewService()
962+
svc2.SetName("svc2")
963+
964+
// Mock discovery failure
965+
discoveryCalls := 0
966+
fakeDisco := &fakedisco.FakeDiscovery{
967+
Fake: &testcore.Fake{
968+
Resources: []*metav1.APIResourceList{},
969+
},
970+
}
971+
fakeDisco.Fake.PrependReactor("get", "resource", func(action testcore.Action) (handled bool, ret runtime.Object, err error) {
972+
discoveryCalls++
973+
return true, nil, errors.New("persistent discovery error")
974+
})
975+
syncCtx.disco = fakeDisco
976+
977+
syncCtx.resources = groupResources(ReconciliationResult{
978+
Live: []*unstructured.Unstructured{svc1, svc2},
979+
Target: []*unstructured.Unstructured{svc1, svc2},
980+
})
981+
982+
tasks, ok := syncCtx.getSyncTasks()
983+
assert.False(t, ok)
984+
assert.NotNil(t, tasks)
985+
986+
// discoveryCalls should be 1 if caching works.
987+
assert.Equal(t, 1, discoveryCalls, "Discovery should have been called only once due to caching")
988+
}
989+
925990
func TestDoNotSyncOrPruneHooks(t *testing.T) {
926991
syncCtx := newTestSyncCtx(nil, WithOperationSettings(false, false, false, true))
927992
targetPod := testingutils.NewPod()

0 commit comments

Comments
 (0)