-
Notifications
You must be signed in to change notification settings - Fork 2
powervs: fix ports cleaning by cleaning them manually on delete #43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| """ | ||
| Helper script for cleaning up orphaned network interfaces (ports) from | ||
| PowerVS subnets. | ||
| """ | ||
|
|
||
| import logging | ||
|
|
||
| from resalloc_ibm_cloud.argparsers import powervs_cleanup_ips_parser | ||
| from resalloc_ibm_cloud.helpers import setup_logging | ||
| from resalloc_ibm_cloud.powervs.credentials import get_powervs_credentials | ||
| from resalloc_ibm_cloud.powervs.client import PowerVSClient | ||
|
|
||
|
|
||
| logger = logging.getLogger(__name__) | ||
|
|
||
|
|
||
| def _get_active_instance_ids(client: PowerVSClient) -> set[str]: | ||
| instances = client.list_instances() | ||
| return {inst["pvmInstanceID"] for inst in instances if inst.get("pvmInstanceID")} | ||
|
|
||
|
|
||
| def cleanup_network_interfaces( | ||
| client: PowerVSClient, | ||
| network_ids: list[str], | ||
| ) -> None: | ||
| """ | ||
| Delete orphaned network interfaces (ports) that are not attached to any | ||
| active VM instance. | ||
| """ | ||
| active_ids = _get_active_instance_ids(client) | ||
|
|
||
| logger.info("Found %d active instance(s)", len(active_ids)) | ||
|
|
||
| for network_id in network_ids: | ||
| interfaces = client.list_network_interfaces(network_id) | ||
| logger.info( | ||
| "Network %s: found %d network interface(s)", | ||
| network_id, len(interfaces), | ||
| ) | ||
|
|
||
| orphaned = 0 | ||
| for iface in interfaces: | ||
| instance_ref = iface.get("instance") | ||
| instance_id = instance_ref.get("instanceID") if instance_ref else None | ||
| if instance_id and instance_id in active_ids: | ||
| continue | ||
|
|
||
| orphaned += 1 | ||
| logger.info( | ||
| "Deleting orphaned network interface %s (ip=%s) from network %s", | ||
| iface["id"], iface.get("ipAddress", "?"), network_id, | ||
| ) | ||
|
|
||
| client.delete_network_interface(network_id, iface["id"]) | ||
|
|
||
| logger.info( | ||
| "Network %s: deleted %d orphaned interface(s)", network_id, orphaned, | ||
| ) | ||
|
|
||
|
|
||
| def main() -> None: | ||
| """Entrypoint to the script.""" | ||
| opts = powervs_cleanup_ips_parser().parse_args() | ||
| setup_logging(opts.log_level) | ||
|
|
||
| credentials = get_powervs_credentials(opts.token_file, opts.crn) | ||
| client = PowerVSClient(credentials) | ||
| cleanup_network_interfaces(client, opts.network_id) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,7 +10,7 @@ | |
| from typing import Any, Optional | ||
|
|
||
| import backoff | ||
| from requests import HTTPError | ||
| from requests import HTTPError, RequestException | ||
| import requests | ||
|
|
||
| from resalloc_ibm_cloud.argparsers import powervs_arg_parser | ||
|
|
@@ -162,6 +162,39 @@ def _delete_volume_with_backoff(self, volume_id: str) -> None: | |
| self.client.delete_volume(volume_id) | ||
| logger.info("Deleted volume with ID %s", volume_id) | ||
|
|
||
| def _wait_for_instance_gone( | ||
| self, instance_id: str, timeout: int = 600, interval: int = 10, | ||
| ) -> None: | ||
| """ | ||
| Poll until the instance no longer exists (404) so that its network | ||
| interfaces are safe to delete. | ||
| """ | ||
| start_time = time.time() | ||
| while True: | ||
| if time.time() - start_time > timeout: | ||
| logger.warning( | ||
| "Timed out waiting for instance %s to be fully deleted", | ||
| instance_id, | ||
| ) | ||
| break | ||
|
|
||
| try: | ||
| instance = self.client.get_instance(instance_id) | ||
| status = instance.get("status", "unknown") | ||
| logger.info( | ||
| "Waiting for instance %s to be deleted (status: %s)", | ||
| instance_id, status, | ||
| ) | ||
| except RequestException as e: | ||
| if e.response.status_code == 404: | ||
| logger.info("Instance %s is gone", instance_id) | ||
| return | ||
|
|
||
| logger.error("Failed to get instance %s: %s", instance_id, str(e)) | ||
| break | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you really want to break, or continue?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. on continue we may get stuck. 5xx errors are retried. Waiting for the VM to delete itself and then check and delete ports was gemini's idea, which makes sense... but in my experience with powervs this is not needed, so I'd rather break on error here (the API client in this tool I've written itself retries on error requests several times and after several retries it gives up, see |
||
|
|
||
| sleep(interval) | ||
|
|
||
| def _force_delete_volume_by_instance_name(self, instance_name: str) -> None: | ||
| # if powervs decides to fail and keep the volume around, force delete any | ||
| # volume that starts with the instance name | ||
|
|
@@ -198,6 +231,18 @@ def delete_vm(self, name: str) -> None: | |
| instance_information = self.client.get_instance(instance_id) | ||
| volume_ids = instance_information.get("volumeIDs", []) | ||
|
|
||
| # IBM Cloud does not automatically clean up network interfaces (ports) | ||
| # when a VM is deleted, which eventually exhausts the subnet IP pool. | ||
| # We need to remember them now and delete them after the instance is gone. | ||
| # This is a new thing in PowerVS, maybe in the future it will be handled automatically | ||
| # again??? | ||
| network_interfaces = [] | ||
| for net in instance_information.get("networks", []): | ||
| network_id = net.get("networkID") | ||
| interface_id = net.get("networkInterfaceID") | ||
| if network_id and interface_id: | ||
| network_interfaces.append((network_id, interface_id)) | ||
|
|
||
| # the data volumes tends to remain undeleted even if the delete_instance | ||
| # call is with delete_data_volumes, so this needs to be assured manually | ||
| for volume_id in volume_ids: | ||
|
|
@@ -218,6 +263,19 @@ def delete_vm(self, name: str) -> None: | |
| instance_id, | ||
| ) | ||
|
|
||
| if not network_interfaces: | ||
| return | ||
|
|
||
| self._wait_for_instance_gone(instance_id) | ||
| for network_id, interface_id in network_interfaces: | ||
| try: | ||
| self.client.delete_network_interface(network_id, interface_id) | ||
| except RequestException as e: | ||
| logger.error( | ||
| "Failed to delete network interface %s: %s", | ||
| interface_id, str(e), | ||
| ) | ||
|
nikromen marked this conversation as resolved.
|
||
|
|
||
| def _parse_volumes(self, volumes_list: list[str]) -> list[dict]: | ||
| """ | ||
| Parse volume specifications from a list of strings. | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.