@@ -60,6 +60,8 @@ type ApplicationGroupReconciler struct {
6060
6161 // Recorder generates kubernetes events
6262 Recorder record.EventRecorder
63+
64+ lastSuccessfulApplicationGroup * orkestrav1alpha1.ApplicationGroup
6365}
6466
6567// +kubebuilder:rbac:groups=orkestra.azure.microsoft.com,resources=applicationgroups,verbs=get;list;watch;create;update;patch;delete
@@ -83,16 +85,17 @@ func (r *ApplicationGroupReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
8385 }
8486
8587 if appGroup .GetAnnotations () != nil {
86- // TODO (nitishm) Use this in error remediation by reapplying last successful appgroup spec
87- // lastSuccessfulApplicationGroup := appGroup.Annotations[lastSuccessfulApplicationGroupKey]
88- _ = appGroup .Annotations [lastSuccessfulApplicationGroupKey ]
88+ last := & orkestrav1alpha1.ApplicationGroup {}
89+ s := appGroup .Annotations [lastSuccessfulApplicationGroupKey ]
90+ _ = json .Unmarshal ([]byte (s ), last )
91+ r .lastSuccessfulApplicationGroup = last
8992 }
9093
91- // handle DELETE if deletion timestamp is non-zero
94+ // handle deletes if deletion timestamp is non-zero
9295 if ! appGroup .DeletionTimestamp .IsZero () {
9396 // If finalizer is found, remove it and requeue
9497 if appGroup .Finalizers != nil {
95- logr .Info ("Cleaning up" )
98+ logr .Info ("cleaning up the applicationgroup resource " )
9699 // TODO: Take remediation action
97100 // Reverse the entire workflow to remove all the Helm Releases
98101 appGroup .Finalizers = nil
@@ -113,34 +116,34 @@ func (r *ApplicationGroupReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
113116 return ctrl.Result {Requeue : true }, nil
114117 }
115118
116- // handle UPDATE if checksum mismatched
119+ // handle first time install and subsequent updates
117120 checksums , err := pkg .Checksum (& appGroup )
118121 if err != nil {
119122 // TODO (nitishm) Handle different error types here to decide remediation action
120123 if errors .Is (err , pkg .ErrChecksumAppGroupSpecMismatch ) {
121124 if appGroup .Status .Checksums != nil {
122125 appGroup .Status .Update = true
123126 }
124- appGroup .Status .Checksums = checksums
125127 requeue , err = r .reconcile (ctx , logr , r .WorkflowNS , & appGroup )
126128 if err != nil {
127129 logr .Error (err , "failed to reconcile ApplicationGroup instance" )
128- r .updateStatusAndEvent (ctx , appGroup , requeue , err )
130+ r .handleResponseAndEvent (ctx , appGroup , requeue , err )
129131 return ctrl.Result {Requeue : requeue }, err
130132 }
131133
134+ appGroup .Status .Checksums = checksums
135+
132136 if appGroup .Status .Phase != v1alpha12 .NodeSucceeded {
133- r .updateStatusAndEvent (ctx , appGroup , requeue , err )
137+ r .handleResponseAndEvent (ctx , appGroup , requeue , err )
134138 return ctrl.Result {Requeue : true , RequeueAfter : requeueAfter }, nil
135139 }
136140
137- r .updateStatusAndEvent (ctx , appGroup , requeue , err )
138- return ctrl.Result {Requeue : false }, nil
141+ r .handleResponseAndEvent (ctx , appGroup , requeue , err )
142+ return ctrl.Result {Requeue : requeue }, err
139143 }
140144
141- appGroup .Status .Error = err .Error ()
142- _ = r .Status ().Update (ctx , & appGroup )
143145 logr .Error (err , "failed to calculate checksum annotations for application group specs" )
146+ r .handleResponseAndEvent (ctx , appGroup , false , err )
144147 return ctrl.Result {Requeue : false }, err
145148 }
146149
@@ -155,8 +158,7 @@ func (r *ApplicationGroupReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
155158 err = r .List (ctx , & wfs , listOption )
156159 if err != nil {
157160 logr .Error (err , "failed to find generate workflow instance" )
158- appGroup .Status .Error = err .Error ()
159- _ = r .Status ().Update (ctx , & appGroup )
161+ r .handleResponseAndEvent (ctx , appGroup , false , err )
160162 return ctrl.Result {Requeue : false }, err
161163 }
162164
@@ -169,18 +171,16 @@ func (r *ApplicationGroupReconciler) Reconcile(req ctrl.Request) (ctrl.Result, e
169171 switch appGroup .Status .Phase {
170172 case v1alpha12 .NodeRunning , v1alpha12 .NodePending :
171173 logr .V (1 ).Info ("workflow in pending/running state. requeue and reconcile after a short period" )
172- _ = r . Status (). Update ( ctx , & appGroup )
174+ r . handleResponseAndEvent ( ctx , appGroup , true , nil )
173175 return ctrl.Result {Requeue : true , RequeueAfter : requeueAfter }, nil
174176 case v1alpha12 .NodeSucceeded :
175177 logr .V (1 ).Info ("workflow ran to completion and succeeded" )
176- appGroup .Status .Error = ""
177- r .updateStatusAndEvent (ctx , appGroup , false , nil )
178+ r .handleResponseAndEvent (ctx , appGroup , false , nil )
178179 return ctrl.Result {Requeue : false }, nil
179180 case v1alpha12 .NodeError , v1alpha12 .NodeFailed :
180181 err = fmt .Errorf ("workflow in failure/error condition" )
181182 logr .Error (err , "workflow in failure/error condition" )
182- appGroup .Status .Error = err .Error ()
183- _ = r .Status ().Update (ctx , & appGroup )
183+ r .handleResponseAndEvent (ctx , appGroup , false , err )
184184 return ctrl.Result {Requeue : false }, err
185185 }
186186
@@ -195,9 +195,11 @@ func (r *ApplicationGroupReconciler) SetupWithManager(mgr ctrl.Manager) error {
195195 Complete (r )
196196}
197197
198- func (r * ApplicationGroupReconciler ) updateStatusAndEvent (ctx context.Context , grp orkestrav1alpha1.ApplicationGroup , requeue bool , err error ) {
198+ func (r * ApplicationGroupReconciler ) handleResponseAndEvent (ctx context.Context , grp orkestrav1alpha1.ApplicationGroup , requeue bool , err error ) {
199199 errStr := ""
200200 if err != nil {
201+ // Handle the error by remediating the workflow
202+ r .handleRemediation (ctx , err )
201203 errStr = err .Error ()
202204 }
203205
@@ -206,9 +208,11 @@ func (r *ApplicationGroupReconciler) updateStatusAndEvent(ctx context.Context, g
206208 _ = r .Status ().Update (ctx , & grp )
207209
208210 if grp .Status .Phase == v1alpha12 .NodeSucceeded {
211+ // Annotate the resource with the last successful ApplicationGroup spec
209212 b , _ := json .Marshal (& grp )
210213 grp .SetAnnotations (map [string ]string {lastSuccessfulApplicationGroupKey : string (b )})
211214 _ = r .Update (ctx , & grp )
215+
212216 r .Recorder .Event (& grp , "Normal" , "ReconcileSuccess" , fmt .Sprintf ("Successfully reconciled ApplicationGroup %s" , grp .Name ))
213217 }
214218
@@ -264,3 +268,10 @@ func initApplications(appGroup *orkestrav1alpha1.ApplicationGroup) {
264268 }
265269 appGroup .Spec .Applications = v .DeepCopy ().Spec .Applications
266270}
271+
272+ func (r * ApplicationGroupReconciler ) handleRemediation (ctx context.Context , err error ) {
273+ if r .lastSuccessfulApplicationGroup != nil {
274+ r .lastSuccessfulApplicationGroup .Status .Checksums = nil
275+ _ = r .Update (ctx , r .lastSuccessfulApplicationGroup )
276+ }
277+ }
0 commit comments