1010from pathlib import Path
1111from typing import Optional , Tuple
1212
13- from az_shared .execute_cmd import execute
14- from common .shell import Cmd
15-
1613from .agentless_api import deactivate_scan_options
1714from .config import (
1815 CONFIG_BASE_DIR ,
1916 Config ,
2017 DEFAULT_RESOURCE_GROUP ,
2118 compute_install_id ,
2219 get_config_dir ,
20+ parse_credentials ,
2321)
2422from .errors import ConfigurationError , SetupError
2523from .metadata import (
24+ MetadataReadResult ,
2625 MetadataReadStatus ,
2726 delete_metadata ,
2827 read_metadata ,
2928 rg_mismatch_detail ,
3029 terraform_state_exists ,
3130)
32- from .secrets import API_KEY_SECRET_NAME , get_key_vault_name
31+ from .secrets import (
32+ API_KEY_SECRET_NAME ,
33+ get_key_vault_name ,
34+ key_vault_exists ,
35+ purge_key_vault ,
36+ soft_deleted_key_vault_exists ,
37+ )
38+ from .reporter import PrintReporter
3339from .state_storage import (
40+ ensure_current_user_blob_data_access ,
3441 find_agentless_resource_groups ,
3542 get_storage_account_name ,
43+ resource_group_exists ,
3644 storage_account_exists ,
3745)
3846from .terraform import generate_ssh_key , generate_terraform_config , generate_tfvars
@@ -154,47 +162,23 @@ def _resolve_destroy_resource_group(
154162 return env_rg or DEFAULT_RESOURCE_GROUP
155163
156164
157- def get_credentials_from_env () -> tuple :
158- """Get Datadog credentials from environment variables.
159-
160- Returns:
161- Tuple of (api_key, app_key, site)
162-
163- Raises:
164- SetupError: If any required credentials are missing.
165- """
166- api_key = os .environ .get ("DD_API_KEY" , "" ).strip ()
167- app_key = os .environ .get ("DD_APP_KEY" , "" ).strip ()
168- site = os .environ .get ("DD_SITE" , "" ).strip ()
169-
170- errors = []
171- if not api_key :
172- errors .append ("DD_API_KEY is required" )
173- if not app_key :
174- errors .append ("DD_APP_KEY is required" )
175- if not site :
176- errors .append ("DD_SITE is required" )
177-
178- if errors :
179- raise SetupError (
180- "Missing credentials" ,
181- "\n " .join (f" - { e } " for e in errors ),
182- )
183-
184- return api_key , app_key , site
185-
186-
187165def regenerate_terraform_config (
188166 scanner_subscription : str ,
189167 storage_account : str ,
190168 resource_group : str ,
191169 config_folder : Path ,
170+ metadata_result : Optional [MetadataReadResult ] = None ,
192171) -> Path :
193172 """Regenerate Terraform configuration when local folder doesn't exist.
194173
195- Reads deployment metadata from Azure Blob Storage to discover all
196- locations and subscriptions. Falls back to requiring environment
197- variables if metadata is not available.
174+ When ``metadata_result`` is provided, reuses the caller's metadata
175+ read instead of issuing a second ``az storage blob show`` round-trip:
176+ ``cmd_destroy`` already reads the blob to recover the scan
177+ subscription list before deciding to call this helper, so a fresh
178+ read here would only duplicate work and risk inconsistency on
179+ transient failures. Falls back to environment variables if metadata
180+ is not available (the typical "user wiped local config and we have
181+ no blob to read" path).
198182
199183 Generates a throwaway SSH key pair with :func:`terraform.generate_ssh_key` (same as
200184 deploy): Azure only needs a decodable public key in ``terraform.tfvars`` for refresh/destroy;
@@ -206,9 +190,9 @@ def regenerate_terraform_config(
206190 """
207191 print ("Local config not found, but Terraform state exists in storage account." )
208192
209- api_key , app_key , site = get_credentials_from_env ()
193+ api_key , app_key , site = parse_credentials ()
210194
211- result = read_metadata (storage_account )
195+ result = metadata_result if metadata_result is not None else read_metadata (storage_account )
212196 metadata = result .metadata if result .status == MetadataReadStatus .PRESENT else None
213197 if metadata :
214198 print (
@@ -279,9 +263,14 @@ def get_working_directory(
279263 scanner_subscription : str ,
280264 storage_account : str ,
281265 resource_group : str ,
266+ metadata_result : Optional [MetadataReadResult ] = None ,
282267) -> Tuple [Path , Optional [Path ]]:
283268 """Get the working directory for Terraform, regenerating config if needed.
284269
270+ ``metadata_result`` is forwarded to :func:`regenerate_terraform_config`
271+ so the caller can pass a previously-read ``MetadataReadResult`` and
272+ avoid re-reading the deployment metadata blob on the regen path.
273+
285274 Returns:
286275 ``(work_dir, ssh_key_temp_dir)``. The second item is a directory to remove
287276 after destroy (only set when config was regenerated and contains a
@@ -317,7 +306,11 @@ def get_working_directory(
317306 sys .exit (1 )
318307
319308 ssh_tmp = regenerate_terraform_config (
320- scanner_subscription , storage_account , resource_group , config_folder
309+ scanner_subscription ,
310+ storage_account ,
311+ resource_group ,
312+ config_folder ,
313+ metadata_result ,
321314 )
322315 return config_folder , ssh_tmp
323316
@@ -362,64 +355,88 @@ def run_terraform_destroy(work_dir: Path) -> None:
362355 os .chdir (original_dir )
363356
364357
365- def delete_key_vault (vault_name : str ) -> bool :
366- """Delete the Key Vault.
367-
368- Returns:
369- True if deleted, False if the az CLI reported failure.
358+ def cleanup_key_vault (
359+ install_id : str , resource_group : str , subscription : str
360+ ) -> None :
361+ """Purge the Key Vault left behind by Terraform.
362+
363+ Terraform does not manage the vault itself (the API-key secret is
364+ written by the wizard before any plan runs), so destroy never
365+ cleans it up automatically. Without a purge, Azure keeps the
366+ soft-deleted vault reserved for its retention window (7 days for
367+ wizard-created vaults) - the recurring root cause of
368+ ``VaultAlreadyExists`` on re-deploy.
369+
370+ Always purges: ``terraform destroy`` has already confirmed the
371+ user's intent by the time we get here. Triggers purge when either
372+ a live vault or a soft-deleted descriptor exists; skips only when
373+ Azure has neither. The soft-deleted check is essential because a
374+ previous destroy may have run ``az keyvault delete`` and then
375+ failed at the purge step - ``key_vault_exists`` then returns False
376+ on retry and would otherwise leave the name reserved for the full
377+ retention window.
370378 """
371- try :
372- execute (
373- Cmd (["az" , "keyvault" , "delete" ])
374- .param ("--name" , vault_name )
375- .flag ("--no-wait" ),
376- )
377- return True
378- except Exception :
379- return False
379+ vault_name = get_key_vault_name (install_id )
380380
381+ live = key_vault_exists (vault_name , resource_group , subscription )
382+ soft_deleted = (
383+ not live and soft_deleted_key_vault_exists (vault_name , subscription )
384+ )
381385
382- def prompt_key_vault_cleanup (install_id : str ) -> None :
383- """Ask user if they want to delete the Key Vault."""
384- vault_name = get_key_vault_name (install_id )
386+ if not live and not soft_deleted :
387+ print ("Key Vault already removed:" )
388+ print (f" { vault_name } " )
389+ print ()
390+ return
385391
386- print ( "=" * 60 )
387- print (" Cleanup Options " )
388- print ( "=" * 60 )
389- print ()
390- print ( "The Key Vault was NOT deleted by Terraform:" )
391- print (f" { vault_name } " )
392- print ()
393- print ( "This vault holds the Datadog API key and may be reused for future deployments ." )
392+ if soft_deleted :
393+ print (f"Purging soft-deleted Key Vault { vault_name } ... " )
394+ else :
395+ print (f"Purging Key Vault { vault_name } ..." )
396+ if purge_key_vault ( vault_name , subscription ):
397+ print (f"✅ Key Vault purged: { vault_name } " )
398+ else :
399+ print ( f"⚠️ Failed to purge Key Vault { vault_name } ." )
394400 print ()
395401
396- try :
397- response = input ("Do you want to delete the Key Vault? (y/N): " ).strip ().lower ()
398- if response in ("y" , "yes" ):
399- print ("Deleting Key Vault..." )
400- if delete_key_vault (vault_name ):
401- print ("✅ Key Vault deleted." )
402- print (" Note: Azure retains soft-deleted vaults for the configured" )
403- print (" retention period. It will be auto-purged after that." )
404- else :
405- print ("⚠️ Failed to delete Key Vault (it may not exist or you lack permissions)." )
406- else :
407- print ("Key Vault kept." )
408- except EOFError :
409- print ("Key Vault kept (non-interactive mode)." )
410402
403+ def print_final_notes (
404+ storage_account : str , resource_group : str , scanner_subscription : str
405+ ) -> None :
406+ """Print final notes about resources that Terraform did not delete.
411407
412- def print_final_notes (storage_account : str , resource_group : str ) -> None :
413- """Print final notes about resources not deleted."""
408+ The state Storage Account, Key Vault, and Resource Group are created
409+ by the Python wizard rather than Terraform, so ``terraform destroy``
410+ leaves them in place by design. When the user has already removed
411+ them out-of-band (typically via ``az group delete``), telling them to
412+ run it again would only surface a ``ResourceGroupNotFound`` error,
413+ so we adjust the notes to match the actual state.
414+ """
414415 print ()
416+ rg_present = resource_group_exists (resource_group , scanner_subscription )
417+ sa_present = rg_present and storage_account_exists (
418+ storage_account , resource_group , scanner_subscription
419+ )
420+
421+ if not rg_present :
422+ print ("=" * 60 )
423+ print (" Cleanup Complete" )
424+ print ("=" * 60 )
425+ print ()
426+ print (f"Resource group already removed: { resource_group } " )
427+ print ("No further manual cleanup needed." )
428+ print ()
429+ return
430+
415431 print ("=" * 60 )
416432 print (" Notes" )
417433 print ("=" * 60 )
418434 print ()
419435 print ("The following resources were NOT deleted:" )
420436 print ()
421437 print (f" Resource Group: { resource_group } " )
422- print (f" Storage Account: { storage_account } (contains Terraform state)" )
438+ if sa_present :
439+ print (f" Storage Account: { storage_account } (contains Terraform state)" )
423440 print ()
424441 print ("You can delete the resource group manually if no longer needed:" )
425442 print (f" az group delete --name { resource_group } --yes --no-wait" )
@@ -443,7 +460,7 @@ def cmd_destroy() -> None:
443460 # Fail fast if Datadog credentials aren't set — they're required
444461 # below for scan options cleanup and for regenerate_terraform_config.
445462 # The returned values are re-read by the callers that actually use them.
446- get_credentials_from_env ()
463+ parse_credentials ()
447464
448465 scanner_subscription = get_scanner_subscription ()
449466
@@ -463,15 +480,48 @@ def cmd_destroy() -> None:
463480 install_id = compute_install_id (scanner_subscription , resource_group )
464481 storage_account = get_storage_account (install_id )
465482
466- # Metadata is no longer authoritative for the resource group, but
467- # we still read it to recover the list of scan subscriptions for
468- # the Datadog-API cleanup at the end.
469- result = read_metadata (storage_account )
470- metadata = result .metadata if result .status == MetadataReadStatus .PRESENT else None
471- subscriptions_to_scan = metadata .subscriptions_to_scan if metadata else []
483+ # Storage Blob Data Contributor is a *data-plane* role; Owner on
484+ # the resource group does not include it. Grant it to the current
485+ # user before the metadata read so a user destroying a deployment
486+ # created by someone else doesn't trip over an opaque
487+ # "permissions" error from `az storage blob show`. When the SA
488+ # does not exist (RG already nuked out-of-band, or a partial
489+ # deploy never created it) the metadata read is also skipped:
490+ # ``az storage blob show`` would DNS-fail on the missing account
491+ # endpoint and produce noisy stderr that ``_classify_blob_show_failure``
492+ # cannot recognise as a clean MISSING.
493+ sa_present = storage_account_exists (
494+ storage_account , resource_group , scanner_subscription
495+ )
496+
497+ metadata_result : Optional [MetadataReadResult ] = None
498+ subscriptions_to_scan : list [str ] = []
499+ if sa_present :
500+ ensure_current_user_blob_data_access (
501+ storage_account ,
502+ resource_group ,
503+ scanner_subscription ,
504+ PrintReporter (),
505+ )
506+
507+ # Metadata is no longer authoritative for the resource group, but
508+ # we still read it to recover the list of scan subscriptions for
509+ # the Datadog-API cleanup at the end. The result is also handed
510+ # down to ``get_working_directory`` so the regen path doesn't
511+ # round-trip the same blob again.
512+ metadata_result = read_metadata (storage_account )
513+ metadata = (
514+ metadata_result .metadata
515+ if metadata_result .status == MetadataReadStatus .PRESENT
516+ else None
517+ )
518+ subscriptions_to_scan = metadata .subscriptions_to_scan if metadata else []
472519
473520 work_dir , destroy_ssh_key_dir = get_working_directory (
474- scanner_subscription , storage_account , resource_group
521+ scanner_subscription ,
522+ storage_account ,
523+ resource_group ,
524+ metadata_result ,
475525 )
476526
477527 try :
@@ -498,8 +548,8 @@ def cmd_destroy() -> None:
498548 else :
499549 print ("⚠️ Keeping deployment metadata so you can re-run `destroy` to retry." )
500550
501- prompt_key_vault_cleanup (install_id )
502- print_final_notes (storage_account , resource_group )
551+ cleanup_key_vault (install_id , resource_group , scanner_subscription )
552+ print_final_notes (storage_account , resource_group , scanner_subscription )
503553
504554 except SetupError as e :
505555 print ()
0 commit comments