Skip to content

Commit e19fbfa

Browse files
committed
host volumes: add configuration to GC on node GC
When a node is garbage collected, any dynamic host volumes on the node are orphaned in the state store. We generally don't want to automatically collect these volumes and risk data loss, and have provided a CLI flag to `-force` remove them in #25902. But for clusters running on ephemeral cloud instances (ex. AWS EC2 in an autoscaling group), deleting host volumes may add excessive friction. Add a configuration knob to the client configuration to remove host volumes from the state store on node GC. Ref: #25902 Ref: #25762 Ref: https://hashicorp.atlassian.net/browse/NMD-705
1 parent 456d95a commit e19fbfa

File tree

15 files changed

+92
-7
lines changed

15 files changed

+92
-7
lines changed

.changelog/25903.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
```release-note:improvement
2+
client: Add gc_volumes_on_node_gc configuration to delete host volumes when nodes are garbage collected
3+
```

api/nodes.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@ type Node struct {
566566
Events []*NodeEvent
567567
Drivers map[string]*DriverInfo
568568
HostVolumes map[string]*HostVolumeInfo
569+
GCVolumesOnNodeGC bool
569570
HostNetworks map[string]*HostNetworkInfo
570571
CSIControllerPlugins map[string]*CSIInfo
571572
CSINodePlugins map[string]*CSIInfo

client/client.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1595,6 +1595,8 @@ func (c *Client) setupNode() error {
15951595
node.HostVolumes[k] = v.Copy()
15961596
}
15971597
}
1598+
node.GCVolumesOnNodeGC = newConfig.GCVolumesOnNodeGC
1599+
15981600
if node.HostNetworks == nil {
15991601
if l := len(newConfig.HostNetworks); l != 0 {
16001602
node.HostNetworks = make(map[string]*structs.ClientHostNetworkConfig, l)

client/config/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,11 @@ type Config struct {
242242
// before garbage collection is triggered.
243243
GCMaxAllocs int
244244

245+
// GCVolumesOnNodeGC indicates that the server should GC any dynamic host
246+
// volumes on this node when the node is GC'd. This should only be set if
247+
// you know that a GC'd node can never come back
248+
GCVolumesOnNodeGC bool
249+
245250
// NoHostUUID disables using the host's UUID and will force generation of a
246251
// random UUID.
247252
NoHostUUID bool

command/agent/agent.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -946,6 +946,8 @@ func convertClientConfig(agentConfig *Config) (*clientconfig.Config, error) {
946946
conf.GCDiskUsageThreshold = agentConfig.Client.GCDiskUsageThreshold
947947
conf.GCInodeUsageThreshold = agentConfig.Client.GCInodeUsageThreshold
948948
conf.GCMaxAllocs = agentConfig.Client.GCMaxAllocs
949+
conf.GCVolumesOnNodeGC = agentConfig.Client.GCVolumesOnNodeGC
950+
949951
if agentConfig.Client.NoHostUUID != nil {
950952
conf.NoHostUUID = *agentConfig.Client.NoHostUUID
951953
} else {

command/agent/config.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,11 @@ type ClientConfig struct {
341341
// before garbage collection is triggered.
342342
GCMaxAllocs int `hcl:"gc_max_allocs"`
343343

344+
// GCVolumesOnNodeGC indicates that the server should GC any dynamic host
345+
// volumes on this node when the node is GC'd. This should only be set if
346+
// you know that a GC'd node can never come back
347+
GCVolumesOnNodeGC bool `hcl:"gc_volumes_on_node_gc"`
348+
344349
// NoHostUUID disables using the host's UUID and will force generation of a
345350
// random UUID.
346351
NoHostUUID *bool `hcl:"no_host_uuid"`
@@ -2543,6 +2548,9 @@ func (a *ClientConfig) Merge(b *ClientConfig) *ClientConfig {
25432548
if b.GCMaxAllocs != 0 {
25442549
result.GCMaxAllocs = b.GCMaxAllocs
25452550
}
2551+
if b.GCVolumesOnNodeGC {
2552+
result.GCVolumesOnNodeGC = b.GCVolumesOnNodeGC
2553+
}
25462554
// NoHostUUID defaults to true, merge if false
25472555
if b.NoHostUUID != nil {
25482556
result.NoHostUUID = b.NoHostUUID

command/agent/config_parse_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ var basicConfig = &Config{
8888
GCDiskUsageThreshold: 82,
8989
GCInodeUsageThreshold: 91,
9090
GCMaxAllocs: 50,
91+
GCVolumesOnNodeGC: true,
9192
NoHostUUID: pointer.Of(false),
9293
DisableRemoteExec: true,
9394
HostVolumes: []*structs.ClientHostVolumeConfig{

command/agent/testdata/basic.hcl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ client {
9595
gc_disk_usage_threshold = 82
9696
gc_inode_usage_threshold = 91
9797
gc_max_allocs = 50
98+
gc_volumes_on_node_gc = true
9899
no_host_uuid = false
99100
disable_remote_exec = true
100101

command/agent/testdata/basic.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@
9494
"gc_interval": "6s",
9595
"gc_max_allocs": 50,
9696
"gc_parallel_destroys": 6,
97+
"gc_volumes_on_node_gc": true,
9798
"host_volume": [
9899
{
99100
"tmp": [

nomad/state/events_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -821,7 +821,7 @@ func TestNodeEventsFromChanges(t *testing.T) {
821821
return upsertNodeTxn(tx, tx.Index, testNode())
822822
},
823823
Mutate: func(s *StateStore, tx *txn) error {
824-
return deleteNodeTxn(tx, tx.Index, []string{testNodeID()})
824+
return s.deleteNodeTxn(tx, tx.Index, []string{testNodeID()})
825825
},
826826
WantEvents: []structs.Event{{
827827
Topic: structs.TopicNode,
@@ -842,7 +842,7 @@ func TestNodeEventsFromChanges(t *testing.T) {
842842
return upsertNodeTxn(tx, tx.Index, testNode(nodeIDTwo))
843843
},
844844
Mutate: func(s *StateStore, tx *txn) error {
845-
return deleteNodeTxn(tx, tx.Index, []string{testNodeID(), testNodeIDTwo()})
845+
return s.deleteNodeTxn(tx, tx.Index, []string{testNodeID(), testNodeIDTwo()})
846846
},
847847
WantEvents: []structs.Event{
848848
{

0 commit comments

Comments
 (0)