Open
Description
As an alternative to the option in #318 (for pulling from a private container registry) I'd like to
- Create a User Assigned Managed Identity
- Grant that managed identity permission to pull from a private Azure Container Registry
- Use that Managed Identity as an identify for the Dask scheduler and workers
I could add an argument dedicated to that. But maybe we offer a general extra_vm_options
argument? At a glance, this seems to work
diff --git a/dask_cloudprovider/azure/azurevm.py b/dask_cloudprovider/azure/azurevm.py
index a2accbb..8111fcb 100644
--- a/dask_cloudprovider/azure/azurevm.py
+++ b/dask_cloudprovider/azure/azurevm.py
@@ -48,6 +48,7 @@ class AzureVM(VMInterface):
extra_bootstrap=None,
auto_shutdown: bool = None,
marketplace_plan: dict = {},
+ extra_vm_options: Optional[dict] = None,
**kwargs,
):
super().__init__(*args, **kwargs)
@@ -71,6 +72,7 @@ class AzureVM(VMInterface):
self.auto_shutdown = auto_shutdown
self.env_vars = env_vars
self.marketplace_plan = marketplace_plan
+ self.extra_vm_options = extra_vm_options or {}
async def create_vm(self):
[subnet_info, *_] = await self.cluster.call_async(
@@ -179,6 +181,13 @@ class AzureVM(VMInterface):
vm_parameters["storage_profile"]["image_reference"]["version"] = "latest"
self.cluster._log("Using Marketplace VM image with a Plan")
+ repeated = self.extra_vm_options.keys() & vm_parameters.keys()
+ if repeated:
+ raise TypeError(
+ f"Parameters are passed in both 'extra_vm_options' and as regular parameters: {repeated}"
+ )
+ vm_parameters = {**self.extra_vm_options, **vm_parameters}
+
self.cluster._log("Creating VM")
if self.cluster.debug:
self.cluster._log(
@@ -472,6 +481,7 @@ class AzureVMCluster(VMCluster):
debug: bool = False,
marketplace_plan: dict = {},
subscription_id: Optional[str] = None,
+ extra_vm_options: Optional[dict] = None,
**kwargs,
):
self.config = ClusterConfig(dask.config.get("cloudprovider.azure", {}))
@@ -550,7 +560,7 @@ class AzureVMCluster(VMCluster):
"""To create a virtual machine from Marketplace image or a custom image sourced
from a Marketplace image with a plan, all 3 fields 'name', 'publisher' and 'product' must be passed."""
)
-
+ self.extra_vm_options = extra_vm_options or {}
self.options = {
"cluster": self,
"config": self.config,
@@ -563,6 +573,7 @@ class AzureVMCluster(VMCluster):
"auto_shutdown": self.auto_shutdown,
"docker_image": self.docker_image,
"marketplace_plan": self.marketplace_plan,
+ "extra_vm_options": self.extra_vm_options,
}
self.scheduler_options = {
"vm_size": self.scheduler_vm_size,
Then I can run
import os
import azure.identity
import dask_cloudprovider.azure
import azure.mgmt.compute.models
subscription_id = os.environ["DASK_CLOUDPROVIDER__AZURE__SUBSCRIPTION_ID"]
rg_name = os.environ["DASK_CLOUDPROVIDER__AZURE__RESOURCE_GROUP"]
identity_name = "dask-cloudprovider-identity"
v = azure.mgmt.compute.models.UserAssignedIdentitiesValue()
user_assigned_identities = {
f"/subscriptions/{subscription_id}/resourcegroups/{rg_name}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/{identity_name}": v
}
identity = azure.mgmt.compute.models.VirtualMachineIdentity(
type="UserAssigned",
user_assigned_identities=user_assigned_identities
)
cluster = dask_cloudprovider.azure.AzureVMCluster(extra_vm_options={"identity": identity.as_dict()}, debug=True)
cluster.scale(1)
And get my managed identity on the VM:
I'll do a bit more testing, but I wanted to confirm that extra_vm_options
is preferred to adding a specific identity
parameter to __init__
.