From 9624d22ca2c113bb960d4e6ae29bac54a67c6e05 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 19 May 2025 16:57:54 -0400 Subject: [PATCH] host volumes: -force flag for delete When a node is garbage collected, we leave behind the dynamic host volume in the state store. We don't want to automatically garbage collect the volumes and risk data loss, but we should allow these to be removed via the API. Fixes: https://github.com/hashicorp/nomad/issues/25762 Fixes: https://hashicorp.atlassian.net/browse/NMD-705 --- .changelog/25902.txt | 3 +++ api/host_volumes.go | 6 +++++- command/volume_delete.go | 20 +++++++++++++------ nomad/host_volume_endpoint.go | 9 ++++++++- nomad/host_volume_endpoint_test.go | 15 +++++++++++--- nomad/structs/host_volumes.go | 1 + .../content/docs/commands/volume/delete.mdx | 4 ++++ 7 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 .changelog/25902.txt diff --git a/.changelog/25902.txt b/.changelog/25902.txt new file mode 100644 index 00000000000..5e3d4e3bca0 --- /dev/null +++ b/.changelog/25902.txt @@ -0,0 +1,3 @@ +```release-note:improvement +host volumes: Add -force flag to volume delete command for removing volumes from GC'd nodes +``` diff --git a/api/host_volumes.go b/api/host_volumes.go index 591cee1d245..f43b6eaa46b 100644 --- a/api/host_volumes.go +++ b/api/host_volumes.go @@ -174,7 +174,8 @@ type HostVolumeListRequest struct { } type HostVolumeDeleteRequest struct { - ID string + ID string + Force bool } type HostVolumeDeleteResponse struct{} @@ -244,6 +245,9 @@ func (hv *HostVolumes) Delete(req *HostVolumeDeleteRequest, opts *WriteOptions) if err != nil { return nil, nil, err } + if req.Force { + path = path + "?force=true" + } wm, err := hv.client.delete(path, nil, resp, opts) return resp, wm, err } diff --git a/command/volume_delete.go b/command/volume_delete.go index 6098d432984..6718c8e9e7c 100644 --- a/command/volume_delete.go +++ b/command/volume_delete.go @@ -29,9 +29,9 @@ Usage: nomad volume delete [options] unpublished. If the volume no longer exists, this command will silently return without an error. - When ACLs are enabled, this command requires a token with the - 'csi-write-volume' and 'csi-read-volume' capabilities for the volume's - namespace. + When ACLs are enabled, this command requires a token with the appropriate + capability in the volume's namespace: the 'csi-write-volume' capability for + CSI volumes or 'host-volume-create' for dynamic host volumes. General Options: @@ -39,6 +39,11 @@ General Options: Delete Options: + -force + Delete the volume from the Nomad state store if the node has been garbage + collected. You should only use -force if the node will never rejoin the + cluster. Only available for dynamic host volumes. + -secret Secrets to pass to the plugin to delete the snapshot. Accepts multiple flags in the form -secret key=value. Only available for CSI volumes. @@ -88,10 +93,12 @@ func (c *VolumeDeleteCommand) Name() string { return "volume delete" } func (c *VolumeDeleteCommand) Run(args []string) int { var secretsArgs flaghelper.StringFlag var typeArg string + var force bool flags := c.Meta.FlagSet(c.Name(), FlagSetClient) flags.Usage = func() { c.Ui.Output(c.Help()) } flags.Var(&secretsArgs, "secret", "secrets for snapshot, ex. -secret key=value") flags.StringVar(&typeArg, "type", "csi", "type of volume (csi or host)") + flags.BoolVar(&force, "force", false, "force delete from garbage collected node") if err := flags.Parse(args); err != nil { c.Ui.Error(fmt.Sprintf("Error parsing arguments %s", err)) @@ -118,7 +125,7 @@ func (c *VolumeDeleteCommand) Run(args []string) int { case "csi": return c.deleteCSIVolume(client, volID, secretsArgs) case "host": - return c.deleteHostVolume(client, volID) + return c.deleteHostVolume(client, volID, force) default: c.Ui.Error(fmt.Sprintf("No such volume type %q", typeArg)) return 1 @@ -174,7 +181,7 @@ func (c *VolumeDeleteCommand) deleteCSIVolume(client *api.Client, volID string, return 0 } -func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string) int { +func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string, force bool) int { if !helper.IsUUID(volID) { stub, possible, err := getHostVolumeByPrefix(client, volID, c.namespace) @@ -195,7 +202,8 @@ func (c *VolumeDeleteCommand) deleteHostVolume(client *api.Client, volID string) c.namespace = stub.Namespace } - _, _, err := client.HostVolumes().Delete(&api.HostVolumeDeleteRequest{ID: volID}, nil) + _, _, err := client.HostVolumes().Delete(&api.HostVolumeDeleteRequest{ + ID: volID, Force: force}, nil) if err != nil { c.Ui.Error(fmt.Sprintf("Error deleting volume: %s", err)) return 1 diff --git a/nomad/host_volume_endpoint.go b/nomad/host_volume_endpoint.go index 1558b957385..bcfebe54cb8 100644 --- a/nomad/host_volume_endpoint.go +++ b/nomad/host_volume_endpoint.go @@ -671,7 +671,14 @@ func (v *HostVolume) Delete(args *structs.HostVolumeDeleteRequest, reply *struct // serialize client RPC and raft write per volume ID index, err := v.serializeCall(vol.ID, "delete", func() (uint64, error) { if err := v.deleteVolume(vol); err != nil { - return 0, err + if structs.IsErrUnknownNode(err) { + if !args.Force { + return 0, fmt.Errorf( + "volume cannot be removed from unknown node without force=true") + } + } else { + return 0, err + } } _, idx, err := v.srv.raftApply(structs.HostVolumeDeleteRequestType, args) if err != nil { diff --git a/nomad/host_volume_endpoint_test.go b/nomad/host_volume_endpoint_test.go index a73f8cce66f..6a46c83b9d2 100644 --- a/nomad/host_volume_endpoint_test.go +++ b/nomad/host_volume_endpoint_test.go @@ -392,16 +392,25 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) { must.Nil(t, getResp.Volume) }) + index++ + must.NoError(t, srv.State().DeleteNode(structs.MsgTypeTestSetup, index, []string{vol1.NodeID})) + // delete vol1 to finish cleaning up - var delResp structs.HostVolumeDeleteResponse - err := msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", &structs.HostVolumeDeleteRequest{ + delReq := &structs.HostVolumeDeleteRequest{ VolumeID: vol1.ID, WriteRequest: structs.WriteRequest{ Region: srv.Region(), Namespace: vol1.Namespace, AuthToken: powerToken, }, - }, &delResp) + } + + var delResp structs.HostVolumeDeleteResponse + err := msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) + must.EqError(t, err, "volume cannot be removed from unknown node without force=true") + + delReq.Force = true + err = msgpackrpc.CallWithCodec(codec, "HostVolume.Delete", delReq, &delResp) must.NoError(t, err) // should be no volumes left diff --git a/nomad/structs/host_volumes.go b/nomad/structs/host_volumes.go index f5ed570066a..81956954a13 100644 --- a/nomad/structs/host_volumes.go +++ b/nomad/structs/host_volumes.go @@ -401,6 +401,7 @@ type HostVolumeRegisterResponse struct { type HostVolumeDeleteRequest struct { VolumeID string + Force bool WriteRequest } diff --git a/website/content/docs/commands/volume/delete.mdx b/website/content/docs/commands/volume/delete.mdx index 4d43de0b427..88dc52f2edf 100644 --- a/website/content/docs/commands/volume/delete.mdx +++ b/website/content/docs/commands/volume/delete.mdx @@ -39,6 +39,10 @@ volumes or `host-volume-delete` for dynamic host volumes. ## Delete options +- `-force`: Delete the volume from the Nomad state store if the node has been + garbage collected. You should only use `-force` if the node will never rejoin + the cluster. Only available for dynamic host volumes. + - `-secret`: Secrets to pass to the plugin to delete the snapshot. Accepts multiple flags in the form `-secret key=value`. Only available for CSI volumes.