Skip to content

Commit 689956d

Browse files
(PTFE-3027) Fix volume creation and deletion for scaleway backend (#708)
### Purpose Currently volume are not well created. Here we had the opportunity to choose between Local or Block storage and fix the deletion of those volume
2 parents c53a485 + e0465a5 commit 689956d

3 files changed

Lines changed: 306 additions & 8 deletions

File tree

runner_manager/backend/scaleway.py

Lines changed: 66 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# pyright: reportOptionalMemberAccess=false, reportArgumentType=false, reportReturnType=false, reportMissingTypeStubs=false
1+
# pyright: reportOptionalMemberAccess=false, reportArgumentType=false, reportReturnType=false, reportMissingTypeStubs=false, reportAttributeAccessIssue=false
22
import logging
33
import os
44
import re
@@ -7,17 +7,16 @@
77

88
from pydantic import Field
99
from redis_om import NotFoundError
10-
from scaleway import Client # type: ignore[import-untyped]
11-
from scaleway.instance.v1 import ( # type: ignore[import-untyped]
10+
from scaleway import Client
11+
from scaleway.instance.v1 import (
1212
Image,
1313
Server,
1414
ServerAction,
1515
ServerState,
16+
VolumeServerTemplate,
1617
)
17-
from scaleway.instance.v1.custom_api import (
18-
InstanceUtilsV1API, # type: ignore[import-untyped]
19-
)
20-
from scaleway.marketplace.v2 import MarketplaceV2API # type: ignore[import-untyped]
18+
from scaleway.instance.v1.custom_api import InstanceUtilsV1API
19+
from scaleway.marketplace.v2 import MarketplaceV2API
2120

2221
from runner_manager.backend.base import BaseBackend
2322
from runner_manager.models.backend import (
@@ -264,6 +263,52 @@ def create(self, runner: Runner) -> Runner:
264263
use_gateway = bool(self.instance_config.public_gateway_id)
265264
dynamic_ip_required = self.instance_config.enable_public_ip and not use_gateway
266265

266+
# Prepare volumes configuration
267+
# Create explicit boot volume with specified size to avoid default 10GB
268+
# Volume types:
269+
# - l_ssd: Local SSD storage (fast, can create raw volumes)
270+
# - sbs_volume: Block Storage (cannot create raw, must use base_snapshot from image)
271+
volumes_config = None
272+
273+
if not self.instance_config.volumes:
274+
volume_size_bytes = self.instance_config.volume_size_gb * 1000000000
275+
276+
if self.instance_config.volume_type == "l_ssd":
277+
volumes_config = {
278+
"0": VolumeServerTemplate(
279+
name=f"{runner.name}-boot",
280+
size=volume_size_bytes,
281+
volume_type="l_ssd",
282+
)
283+
}
284+
log.info(
285+
f"Creating l_ssd boot volume: {self.instance_config.volume_size_gb}GB"
286+
)
287+
else:
288+
img = self.get_image(self.instance_config.image)
289+
if img.root_volume and img.root_volume.id:
290+
volumes_config = {
291+
"0": VolumeServerTemplate(
292+
name=f"{runner.name}-boot",
293+
size=volume_size_bytes,
294+
volume_type="sbs_volume",
295+
base_snapshot=img.root_volume.id,
296+
boot=True,
297+
)
298+
}
299+
log.info(
300+
f"Creating sbs_volume boot volume: {self.instance_config.volume_size_gb}GB "
301+
f"from snapshot {img.root_volume.id}"
302+
)
303+
else:
304+
log.warning(
305+
f"Image {img.id} has no root_volume, using default volume from image"
306+
)
307+
volumes_config = None
308+
else:
309+
# Use user-provided volumes configuration
310+
volumes_config = self.instance_config.volumes
311+
267312
# Create server using _create_server
268313
# Note: In SDK 2.10.3, 'protected' is a required parameter
269314
response = self.client._create_server(
@@ -278,6 +323,7 @@ def create(self, runner: Runner) -> Runner:
278323
project=self.config.project_id,
279324
organization=self.config.organization_id,
280325
security_group=security_group,
326+
volumes=volumes_config,
281327
)
282328

283329
server = response.server
@@ -391,6 +437,10 @@ def delete(self, runner: Runner) -> int:
391437
log.info(f"Server {runner.instance_id} deleted successfully")
392438

393439
# Delete associated volumes
440+
# Note: The behavior differs by volume type:
441+
# - l_ssd (local storage): Usually auto-deleted with the server
442+
# - sbs_volume (block storage): Persists after server deletion, must be deleted manually
443+
# The Instance API manages both types, but sbs volumes need explicit cleanup
394444
for volume_id in volume_ids:
395445
try:
396446
self.client.delete_volume(
@@ -399,7 +449,15 @@ def delete(self, runner: Runner) -> int:
399449
)
400450
log.info(f"Volume {volume_id} deleted successfully")
401451
except Exception as vol_error:
402-
log.warning(f"Failed to delete volume {volume_id}: {vol_error}")
452+
error_msg = str(vol_error)
453+
# Volume may already be deleted automatically (especially l_ssd volumes)
454+
# or might not be found if searching in wrong scope
455+
if "404" in error_msg or "not_found" in error_msg.lower():
456+
log.info(
457+
f"Volume {volume_id} not found - may have been auto-deleted with server or already cleaned up"
458+
)
459+
else:
460+
log.warning(f"Failed to delete volume {volume_id}: {vol_error}")
403461

404462
except Exception as e:
405463
if "404" in str(e) or "not found" in str(e).lower():

runner_manager/models/backend.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,3 +330,7 @@ class ScalewayInstanceConfig(InstanceConfig):
330330
boot_type: str = "local"
331331
volumes: Dict[str, str] = {}
332332
tags: List[str] = []
333+
volume_size_gb: int = 20 # Size of boot volume in GB
334+
volume_type: Literal["l_ssd", "sbs_volume"] = (
335+
"sbs_volume" # Local or block storage (sbs_volume is more universal)
336+
)

tests/unit/backend/test_scaleway.py

Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,74 @@ def mock_wait(self, server_id, target_state, timeout=300):
443443
assert result == 1
444444

445445

446+
def test_delete_with_volume_not_found_404(
447+
scaleway_runner, fake_scaleway_group, caplog, monkeypatch
448+
):
449+
"""Test instance deletion when volume returns 404 error."""
450+
backend = fake_scaleway_group.backend
451+
scaleway_runner.instance_id = "test-server-id"
452+
scaleway_runner.save()
453+
454+
# Mock server with volumes
455+
mock_volume = MagicMock()
456+
mock_volume.id = "test-volume-id"
457+
mock_server = MagicMock()
458+
mock_server.id = "test-server-id"
459+
mock_server.state = ServerState.RUNNING
460+
mock_server.volumes = {"0": mock_volume}
461+
462+
mock_client = backend.client
463+
mock_client.get_server.return_value = MagicMock(server=mock_server)
464+
mock_client.delete_volume.side_effect = Exception("Error 404: Volume not found")
465+
466+
# Restore wait_for_server_state mock
467+
def mock_wait(self, server_id, target_state, timeout=300):
468+
return mock_server
469+
470+
monkeypatch.setattr(ScalewayBackend, "wait_for_server_state", mock_wait)
471+
472+
result = backend.delete(scaleway_runner)
473+
474+
# Verify info log for not_found volume (not warning)
475+
assert "not found - may have been auto-deleted" in caplog.text
476+
assert "Failed to delete volume" not in caplog.text
477+
assert result == 1
478+
479+
480+
def test_delete_with_volume_not_found_string(
481+
scaleway_runner, fake_scaleway_group, caplog, monkeypatch
482+
):
483+
"""Test instance deletion when volume returns 'not_found' in error message."""
484+
backend = fake_scaleway_group.backend
485+
scaleway_runner.instance_id = "test-server-id"
486+
scaleway_runner.save()
487+
488+
# Mock server with volumes
489+
mock_volume = MagicMock()
490+
mock_volume.id = "test-volume-id"
491+
mock_server = MagicMock()
492+
mock_server.id = "test-server-id"
493+
mock_server.state = ServerState.RUNNING
494+
mock_server.volumes = {"0": mock_volume}
495+
496+
mock_client = backend.client
497+
mock_client.get_server.return_value = MagicMock(server=mock_server)
498+
mock_client.delete_volume.side_effect = Exception("resource_not_found")
499+
500+
# Restore wait_for_server_state mock
501+
def mock_wait(self, server_id, target_state, timeout=300):
502+
return mock_server
503+
504+
monkeypatch.setattr(ScalewayBackend, "wait_for_server_state", mock_wait)
505+
506+
result = backend.delete(scaleway_runner)
507+
508+
# Verify info log for not_found volume (not warning)
509+
assert "not found - may have been auto-deleted" in caplog.text
510+
assert "Failed to delete volume" not in caplog.text
511+
assert result == 1
512+
513+
446514
def test_delete_stopped_server(scaleway_runner, fake_scaleway_group):
447515
"""Test deletion of already stopped server."""
448516
backend = fake_scaleway_group.backend
@@ -606,3 +674,171 @@ def test_list(scaleway_runner, scaleway_group):
606674
scaleway_group.backend.delete(runner)
607675
with pytest.raises(NotFoundError):
608676
scaleway_group.backend.get(runner.instance_id)
677+
678+
679+
def test_create_with_default_sbs_volume(
680+
scaleway_runner, fake_scaleway_group, monkeypatch, caplog
681+
):
682+
"""Test instance creation with default sbs_volume configuration."""
683+
# Mock image with root_volume
684+
mock_image = MagicMock()
685+
mock_image.id = "test-image-id"
686+
mock_root_volume = MagicMock()
687+
mock_root_volume.id = "snapshot-id"
688+
mock_image.root_volume = mock_root_volume
689+
690+
# Patch get_image at class level
691+
def mock_get_image(self, image_name):
692+
return mock_image
693+
694+
monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image)
695+
696+
backend = fake_scaleway_group.backend
697+
backend.create(scaleway_runner)
698+
699+
# Verify volumes parameter was passed to _create_server
700+
mock_client = backend.client
701+
create_call = mock_client._create_server.call_args
702+
volumes = create_call.kwargs.get("volumes")
703+
704+
assert volumes is not None
705+
assert "0" in volumes
706+
assert volumes["0"].volume_type == "sbs_volume"
707+
assert volumes["0"].size == 20_000_000_000 # 20GB default
708+
assert volumes["0"].base_snapshot == "snapshot-id"
709+
710+
# Verify log message
711+
assert "Creating sbs_volume boot volume: 20GB" in caplog.text
712+
assert "from snapshot snapshot-id" in caplog.text
713+
714+
715+
def test_create_with_l_ssd_volume(
716+
scaleway_runner, fake_scaleway_group, monkeypatch, caplog
717+
):
718+
"""Test instance creation with l_ssd volume type."""
719+
# Mock image
720+
mock_image = MagicMock()
721+
mock_image.id = "test-image-id"
722+
723+
# Patch get_image at class level
724+
def mock_get_image(self, image_name):
725+
return mock_image
726+
727+
monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image)
728+
729+
# Configure l_ssd
730+
fake_scaleway_group.backend.instance_config.volume_type = "l_ssd"
731+
fake_scaleway_group.backend.instance_config.volume_size_gb = 80
732+
733+
backend = fake_scaleway_group.backend
734+
backend.create(scaleway_runner)
735+
736+
# Verify volumes parameter
737+
mock_client = backend.client
738+
create_call = mock_client._create_server.call_args
739+
volumes = create_call.kwargs.get("volumes")
740+
741+
assert volumes is not None
742+
assert "0" in volumes
743+
assert volumes["0"].volume_type == "l_ssd"
744+
assert volumes["0"].size == 80_000_000_000 # 80GB
745+
# For l_ssd, no base_snapshot should be set
746+
assert (
747+
not hasattr(volumes["0"], "base_snapshot") or volumes["0"].base_snapshot is None
748+
)
749+
750+
# Verify log message
751+
assert "Creating l_ssd boot volume: 80GB" in caplog.text
752+
753+
754+
def test_create_with_custom_volume_size(
755+
scaleway_runner, fake_scaleway_group, monkeypatch
756+
):
757+
"""Test instance creation with custom volume size."""
758+
mock_image = MagicMock()
759+
mock_image.id = "test-image-id"
760+
mock_root_volume = MagicMock()
761+
mock_root_volume.id = "snapshot-id"
762+
mock_image.root_volume = mock_root_volume
763+
764+
# Patch get_image at class level
765+
def mock_get_image(self, image_name):
766+
return mock_image
767+
768+
monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image)
769+
770+
# Set custom size
771+
fake_scaleway_group.backend.instance_config.volume_size_gb = 100
772+
backend = fake_scaleway_group.backend
773+
774+
backend.create(scaleway_runner)
775+
776+
mock_client = backend.client
777+
create_call = mock_client._create_server.call_args
778+
volumes = create_call.kwargs.get("volumes")
779+
780+
assert volumes["0"].size == 100_000_000_000 # 100GB
781+
782+
783+
def test_create_with_no_root_volume_fallback(
784+
scaleway_runner, fake_scaleway_group, monkeypatch, caplog
785+
):
786+
"""Test fallback when image has no root_volume."""
787+
mock_image = MagicMock()
788+
mock_image.id = "test-image-id"
789+
mock_image.root_volume = None # No root volume
790+
791+
# Patch get_image at class level
792+
def mock_get_image(self, image_name):
793+
return mock_image
794+
795+
monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image)
796+
797+
backend = fake_scaleway_group.backend
798+
backend.create(scaleway_runner)
799+
800+
# Verify warning was logged
801+
assert "has no root_volume, using default volume from image" in caplog.text
802+
803+
# Verify volumes=None was passed
804+
mock_client = backend.client
805+
create_call = mock_client._create_server.call_args
806+
volumes = create_call.kwargs.get("volumes")
807+
assert volumes is None
808+
809+
810+
def test_create_with_user_provided_volumes(
811+
scaleway_runner, fake_scaleway_group, monkeypatch
812+
):
813+
"""Test instance creation with user-provided volumes configuration."""
814+
from scaleway.instance.v1 import VolumeServerTemplate
815+
816+
# Mock image
817+
mock_image = MagicMock()
818+
mock_image.id = "test-image-id"
819+
820+
# Patch get_image at class level
821+
def mock_get_image(self, image_name):
822+
return mock_image
823+
824+
monkeypatch.setattr(ScalewayBackend, "get_image", mock_get_image)
825+
826+
# Set user-provided volumes
827+
custom_volumes = {
828+
"0": VolumeServerTemplate(
829+
volume_type="sbs_volume",
830+
size=50_000_000_000,
831+
base_snapshot="custom-snapshot-id",
832+
)
833+
}
834+
fake_scaleway_group.backend.instance_config.volumes = custom_volumes
835+
836+
backend = fake_scaleway_group.backend
837+
backend.create(scaleway_runner)
838+
839+
# Verify user-provided volumes were used
840+
mock_client = backend.client
841+
create_call = mock_client._create_server.call_args
842+
volumes = create_call.kwargs.get("volumes")
843+
844+
assert volumes == custom_volumes

0 commit comments

Comments
 (0)