1- // Copyright 2023 Google LLC
1+ // Copyright 2025 Google LLC
22//
33// Licensed under the Apache License, Version 2.0 (the "License");
44// you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@ package cmd
1717
1818import (
1919 "bufio"
20+ "context"
2021 "fmt"
2122 "hpc-toolkit/pkg/config"
2223 "hpc-toolkit/pkg/logging"
@@ -27,13 +28,22 @@ import (
2728 "strings"
2829
2930 "github.com/spf13/cobra"
31+ "github.com/zclconf/go-cty/cty"
32+ "golang.org/x/oauth2/google"
33+ compute "google.golang.org/api/compute/v1"
34+ "google.golang.org/api/option"
35+ )
36+
37+ var (
38+ robustDestroy bool
3039)
3140
3241func init () {
3342 rootCmd .AddCommand (
3443 addGroupSelectionFlags (
3544 addAutoApproveFlag (
3645 addArtifactsDirFlag (destroyCmd ))))
46+ destroyCmd .Flags ().BoolVar (& robustDestroy , "robust" , false , "Perform a robust destroy, including firewall rule cleanup." )
3747}
3848
3949var (
4858 }
4959)
5060
61+ var (
62+ destroyGroupsFunc = destroyGroups
63+ cleanupFirewallRulesFunc = cleanupFirewallRules
64+ destroyTerraformGroupFunc = destroyTerraformGroup
65+ )
66+
5167func runDestroyCmd (cmd * cobra.Command , args []string ) {
5268 deplRoot := args [0 ]
5369 artifactsDir := getArtifactsDir (deplRoot )
@@ -60,14 +76,66 @@ func runDestroyCmd(cmd *cobra.Command, args []string) {
6076 checkErr (validateGroupSelectionFlags (bp ), ctx )
6177 checkErr (shell .ValidateDeploymentDirectory (bp .Groups , deplRoot ), ctx )
6278
79+ destroyRunner (deplRoot , artifactsDir , bp , ctx )
80+ }
81+
82+ func destroyRunner (deplRoot string , artifactsDir string , bp config.Blueprint , ctx * config.YamlCtx ) {
83+ maxRetries := 1
84+ if robustDestroy {
85+ maxRetries = 3
86+ }
87+
88+ for attempt := 1 ; attempt <= maxRetries ; attempt ++ {
89+ logging .Info ("Destroy attempt %d of %d" , attempt , maxRetries )
90+
91+ destroyFailed , packerManifests := destroyGroupsFunc (deplRoot , artifactsDir , bp , ctx )
92+
93+ if ! destroyFailed {
94+ logging .Info ("Successfully destroyed all selected groups." )
95+ modulewriter .WritePackerDestroyInstructions (os .Stdout , packerManifests )
96+ return // Exit runDestroyCmd successfully
97+ }
98+
99+ if attempt == maxRetries {
100+ logging .Fatal ("Destruction of %q failed after %d attempts" , deplRoot , maxRetries )
101+ }
102+ logging .Info ("Retrying destroy..." )
103+ }
104+ }
105+
106+ func groupHasNetworkModule (group config.Group ) bool {
107+ for _ , module := range group .Modules {
108+ if strings .HasPrefix (module .Source , "modules/network/" ) || strings .HasPrefix (module .Source , "community/modules/network/" ) {
109+ return true
110+ }
111+ }
112+ return false
113+ }
114+
115+ func destroyGroups (deplRoot string , artifactsDir string , bp config.Blueprint , ctx * config.YamlCtx ) (bool , []string ) {
63116 // destroy in reverse order of creation!
64117 packerManifests := []string {}
118+ destroyFailed := false
65119 for i := len (bp .Groups ) - 1 ; i >= 0 ; i -- {
66120 group := bp .Groups [i ]
67121 if ! isGroupSelected (group .Name ) {
68122 logging .Info ("skipping group %q" , group .Name )
69123 continue
70124 }
125+
126+ if robustDestroy && groupHasNetworkModule (group ) {
127+ projectID , deploymentName , err := getProjectAndDeploymentVars (bp .Vars )
128+ if err != nil {
129+ logging .Error ("Skipping firewall cleanup: could not get required variables. %v" , err )
130+ destroyFailed = true
131+ break
132+ } else if err := cleanupFirewallRulesFunc (projectID , deploymentName ); err != nil {
133+ logging .Error ("Failed to cleanup firewall rules for group %s: %v" , group .Name , err )
134+ destroyFailed = true
135+ break
136+ }
137+ }
138+
71139 groupDir := filepath .Join (deplRoot , string (group .Name ))
72140
73141 if err := shell .ImportInputs (groupDir , artifactsDir , bp ); err != nil {
@@ -83,21 +151,47 @@ func runDestroyCmd(cmd *cobra.Command, args []string) {
83151 moduleDir := filepath .Join (groupDir , string (group .Modules [0 ].ID ))
84152 packerManifests = append (packerManifests , filepath .Join (moduleDir , "packer-manifest.json" ))
85153 case config .TerraformKind :
86- err = destroyTerraformGroup (groupDir )
154+ err = destroyTerraformGroupFunc (groupDir )
87155 default :
88156 err = fmt .Errorf ("group %q is an unsupported kind %q" , groupDir , group .Kind ().String ())
89157 }
90158
91159 if err != nil {
92160 logging .Error ("failed to destroy group %q:\n %s" , group .Name , renderError (err , * ctx ))
161+ destroyFailed = true
93162 if i == 0 || ! destroyChoice (bp .Groups [i - 1 ].Name ) {
94- logging . Fatal ( "destruction of %q failed" , deplRoot )
163+ break // Stop processing groups for this attempt
95164 }
96165 }
166+ }
167+ return destroyFailed , packerManifests
168+ }
97169
170+ func getStringVar (vars config.Dict , key string ) (string , error ) {
171+ val := vars .Get (key )
172+ if val .IsNull () {
173+ return "" , fmt .Errorf ("%s not found or is null in blueprint vars" , key )
174+ }
175+ if val .Type () != cty .String {
176+ return "" , fmt .Errorf ("%s is not a string, got type %s" , key , val .Type ().FriendlyName ())
98177 }
178+ strVal := val .AsString ()
179+ if strVal == "" {
180+ return "" , fmt .Errorf ("%s is empty in blueprint vars" , key )
181+ }
182+ return strVal , nil
183+ }
99184
100- modulewriter .WritePackerDestroyInstructions (os .Stdout , packerManifests )
185+ func getProjectAndDeploymentVars (vars config.Dict ) (string , string , error ) {
186+ projectID , err := getStringVar (vars , "project_id" )
187+ if err != nil {
188+ return "" , "" , err
189+ }
190+ deploymentName , err := getStringVar (vars , "deployment_name" )
191+ if err != nil {
192+ return "" , "" , err
193+ }
194+ return projectID , deploymentName , nil
101195}
102196
103197func destroyTerraformGroup (groupDir string ) error {
@@ -111,6 +205,134 @@ func destroyTerraformGroup(groupDir string) error {
111205 return shell .Destroy (tf , getApplyBehavior (), shell .TextOutput )
112206}
113207
208+ func confirmAction (prompt string ) bool {
209+ reader := bufio .NewReader (os .Stdin )
210+ for {
211+ fmt .Print (prompt )
212+ in , err := reader .ReadString ('\n' )
213+ if err != nil {
214+ logging .Error ("failed to read user input: %v" , err )
215+ return false // Default to no on error
216+ }
217+ switch strings .ToLower (strings .TrimSpace (in )) {
218+ case "y" :
219+ return true
220+ case "n" :
221+ return false
222+ default :
223+ fmt .Println ("Please enter 'y' or 'n'." )
224+ continue
225+ }
226+ }
227+ }
228+
229+ func cleanupFirewallRules (projectID string , deploymentName string ) error {
230+ logging .Info ("Cleaning up firewall rules for project %s, deployment %s" , projectID , deploymentName )
231+
232+ ctx := context .Background ()
233+ creds , err := google .FindDefaultCredentials (ctx , compute .ComputeScope )
234+ if err != nil {
235+ return fmt .Errorf ("failed to find default credentials: %v" , err )
236+ }
237+
238+ computeService , err := compute .NewService (ctx , option .WithCredentials (creds ))
239+ if err != nil {
240+ return fmt .Errorf ("failed to create compute service: %v" , err )
241+ }
242+
243+ // NOTE: This is a partial solution. This implementation only
244+ // uses a regular expression 'contains' filter on the deployment name to find networks (e.g. name eq ".*deployment_name.*" ).
245+ // This will fail to find networks that have a custom name that does not
246+ // contain the deployment name.
247+ // TODO: Implement a more robust solution that parses the Terraform plan
248+ // to get the exact network names.
249+ filter := fmt .Sprintf ("name eq \" .*%s.*\" " , deploymentName )
250+ logging .Info ("Using wildcard network filter: %s" , filter )
251+ networks , err := computeService .Networks .List (projectID ).Filter (filter ).Do ()
252+ if err != nil {
253+ return fmt .Errorf ("failed to list networks with wildcard filter: %v" , err )
254+ }
255+
256+ if len (networks .Items ) == 0 {
257+ logging .Info ("No matching networks found for project %s." , projectID )
258+ return nil
259+ }
260+
261+ firewallsToDelete , err := listAssociatedFirewallRules (projectID , computeService , networks .Items )
262+ if err != nil {
263+ return err
264+ }
265+
266+ if len (firewallsToDelete ) == 0 {
267+ logging .Info ("No firewall rules found to delete for the identified networks." )
268+ return nil
269+ }
270+
271+ return confirmAndDeleteFirewallRules (projectID , deploymentName , & computeServiceWrapper {computeService }, firewallsToDelete )
272+ }
273+
274+ type computeServiceWrapper struct {
275+ * compute.Service
276+ }
277+
278+ func (w * computeServiceWrapper ) FirewallsDelete (projectID string , firewall string ) (* compute.Operation , error ) {
279+ return w .Firewalls .Delete (projectID , firewall ).Do ()
280+ }
281+
282+ // listAssociatedFirewallRules lists firewall rules associated with a given set of networks.
283+ func listAssociatedFirewallRules (projectID string , computeService * compute.Service , networks []* compute.Network ) ([]* compute.Firewall , error ) {
284+ var firewallsToDelete []* compute.Firewall
285+ for _ , network := range networks {
286+ fwList , err := computeService .Firewalls .List (projectID ).Filter (fmt .Sprintf ("network=\" %s\" " , network .SelfLink )).Do ()
287+ if err != nil {
288+ return nil , fmt .Errorf ("failed to list firewall rules for network %s: %v" , network .Name , err )
289+ }
290+ firewallsToDelete = append (firewallsToDelete , fwList .Items ... )
291+ }
292+ return firewallsToDelete , nil
293+ }
294+
295+ // confirmAndDeleteFirewallRules confirms with the user and then deletes the specified firewall rules.
296+ func confirmAndDeleteFirewallRules (projectID string , deploymentName string , computeService firewallDeleter , firewallsToDelete []* compute.Firewall ) error {
297+ var firewallNames []string
298+ for _ , fw := range firewallsToDelete {
299+ firewallNames = append (firewallNames , fw .Name )
300+ }
301+ logging .Info ("Found firewall rules to delete: %v" , firewallNames )
302+
303+ if ! flagAutoApprove {
304+ prompt := fmt .Sprintf ("Do you want to delete these %d firewall rules associated with deployment %s? [y/n]: " , len (firewallNames ), deploymentName )
305+ if ! confirmAction (prompt ) {
306+ logging .Info ("Skipping firewall rule deletion." )
307+ return nil
308+ }
309+ }
310+
311+ logging .Info ("Successfully submitted deletion requests for firewall rules." )
312+ // Delete firewall rules
313+ var deletionErrors []string
314+ for _ , fwName := range firewallNames {
315+ logging .Info ("Deleting firewall rule %s..." , fwName )
316+ _ , err := computeService .FirewallsDelete (projectID , fwName )
317+ if err != nil {
318+ // Log non-critical errors and continue trying to delete other rules
319+ msg := fmt .Sprintf ("Failed to delete firewall rule %s: %v" , fwName , err )
320+ logging .Error ("error deleting firewall rule: %s" , msg )
321+ deletionErrors = append (deletionErrors , msg )
322+ }
323+ }
324+
325+ if len (deletionErrors ) > 0 {
326+ return fmt .Errorf ("encountered errors while deleting firewall rules:\n %s" , strings .Join (deletionErrors , "\n " ))
327+ }
328+
329+ return nil
330+ }
331+
332+ type firewallDeleter interface {
333+ FirewallsDelete (projectID string , firewall string ) (* compute.Operation , error )
334+ }
335+
114336func destroyChoice (nextGroup config.GroupName ) bool {
115337 switch getApplyBehavior () {
116338 case shell .AutomaticApply :
0 commit comments