diff --git a/api/nvidia.com/resource/v1beta1/computedomain.go b/api/nvidia.com/resource/v1beta1/computedomain.go index ea48caa43..a05807f4e 100644 --- a/api/nvidia.com/resource/v1beta1/computedomain.go +++ b/api/nvidia.com/resource/v1beta1/computedomain.go @@ -91,4 +91,11 @@ type ComputeDomainNode struct { Name string `json:"name"` IPAddress string `json:"ipAddress"` CliqueID string `json:"cliqueID"` + // The Index field is used to ensure a consistent IP-to-DNS name + // mapping across all machines within an IMEX domain. Each node's index + // directly determines its DNS name. It is marked as optional (but not + // omitempty) in order to support downgrades and avoid an API bump. + // +optional + // +kubebuilder:validation:Optional + Index int `json:"index"` } diff --git a/cmd/compute-domain-controller/controller.go b/cmd/compute-domain-controller/controller.go index 3f5893dc7..b507c9ff4 100644 --- a/cmd/compute-domain-controller/controller.go +++ b/cmd/compute-domain-controller/controller.go @@ -38,6 +38,9 @@ type ManagerConfig struct { // imageName is the full image name to use when rendering templates imageName string + // maxNodesPerIMEXDomain is the maximum number of nodes per IMEX domain to allocate + maxNodesPerIMEXDomain int + // clientsets provides access to various Kubernetes API client interfaces clientsets flags.ClientSets @@ -67,12 +70,13 @@ func (c *Controller) Run(ctx context.Context) error { workQueue := workqueue.New(workqueue.DefaultControllerRateLimiter()) managerConfig := &ManagerConfig{ - driverName: c.config.driverName, - driverNamespace: c.config.flags.namespace, - additionalNamespaces: c.config.flags.additionalNamespaces.Value(), - imageName: c.config.flags.imageName, - clientsets: c.config.clientsets, - workQueue: workQueue, + driverName: c.config.driverName, + driverNamespace: c.config.flags.namespace, + additionalNamespaces: c.config.flags.additionalNamespaces.Value(), + imageName: c.config.flags.imageName, + maxNodesPerIMEXDomain: c.config.flags.maxNodesPerIMEXDomain, + clientsets: c.config.clientsets, + workQueue: workQueue, } cdManager := NewComputeDomainManager(managerConfig) diff --git a/cmd/compute-domain-controller/daemonset.go b/cmd/compute-domain-controller/daemonset.go index 29e43aa44..600ac20cb 100644 --- a/cmd/compute-domain-controller/daemonset.go +++ b/cmd/compute-domain-controller/daemonset.go @@ -50,6 +50,7 @@ type DaemonSetTemplateData struct { ComputeDomainLabelValue types.UID ResourceClaimTemplateName string ImageName string + MaxNodesPerIMEXDomain int FeatureGates map[string]bool } @@ -200,6 +201,7 @@ func (m *DaemonSetManager) Create(ctx context.Context, cd *nvapi.ComputeDomain) ComputeDomainLabelValue: cd.UID, ResourceClaimTemplateName: rct.Name, ImageName: m.config.imageName, + MaxNodesPerIMEXDomain: m.config.maxNodesPerIMEXDomain, FeatureGates: featuregates.ToMap(), } diff --git a/cmd/compute-domain-controller/main.go b/cmd/compute-domain-controller/main.go index fb30eff33..411cb3b3f 100644 --- a/cmd/compute-domain-controller/main.go +++ b/cmd/compute-domain-controller/main.go @@ -44,6 +44,12 @@ import ( const ( DriverName = "compute-domain.nvidia.com" + + // This constant provides a reasonable default for the maximum size of + // a given IMEX Domain. On GB200 and GB300 the limit is 18, so we pick + // this for now. It can be overridden as an environment variable or + // command line argument as required. + defaultMaxNodesPerIMEXDomain = 18 ) type Flags struct { @@ -51,9 +57,10 @@ type Flags struct { loggingConfig *flags.LoggingConfig featureGateConfig *flags.FeatureGateConfig - podName string - namespace string - imageName string + podName string + namespace string + imageName string + maxNodesPerIMEXDomain int httpEndpoint string metricsPath string @@ -103,6 +110,13 @@ func newApp() *cli.App { Destination: &flags.imageName, EnvVars: []string{"IMAGE_NAME"}, }, + &cli.IntFlag{ + Name: "max-nodes-per-imex-domain", + Usage: "The maximum number of possible nodes per IMEX domain", + Value: defaultMaxNodesPerIMEXDomain, + EnvVars: []string{"MAX_NODES_PER_IMEX_DOMAIN"}, + Destination: &flags.maxNodesPerIMEXDomain, + }, &cli.StringFlag{ Category: "HTTP server:", Name: "http-endpoint", diff --git a/cmd/compute-domain-daemon/computedomain.go b/cmd/compute-domain-daemon/computedomain.go index 8c847f5d0..643342912 100644 --- a/cmd/compute-domain-daemon/computedomain.go +++ b/cmd/compute-domain-daemon/computedomain.go @@ -218,9 +218,16 @@ func (m *ComputeDomainManager) UpdateComputeDomainNodeInfo(ctx context.Context, // If there isn't one, create one and append it to the list if nodeInfo == nil { + // Get the next available index for this new node + nextIndex, err := getNextAvailableIndex(newCD.Status.Nodes, m.config.maxNodesPerIMEXDomain) + if err != nil { + return fmt.Errorf("error getting next available index: %w", err) + } + nodeInfo = &nvapi.ComputeDomainNode{ Name: m.config.nodeName, CliqueID: m.config.cliqueID, + Index: nextIndex, } newCD.Status.Nodes = append(newCD.Status.Nodes, nodeInfo) } @@ -243,6 +250,46 @@ func (m *ComputeDomainManager) UpdateComputeDomainNodeInfo(ctx context.Context, return nil } +// The Index field in the Nodes section of the ComputeDomain status ensures a +// consistent IP-to-DNS name mapping across all machines within a given IMEX +// domain. Each node's index directly determines its DNS name using the format +// "compute-domain-daemon-{index}". +// +// getNextAvailableIndex finds the next available index for the current node by +// seeing which ones are already taken by other nodes in the ComputeDomain +// status. It fills in gaps where it can, and returns an error if no index is +// available within maxNodesPerIMEXDomain. +// +// By filling gaps in the index sequence (rather than always appending), we +// maintain stable DNS names for existing nodes even when intermediate nodes +// are removed from the compute domain and new ones are added. +func getNextAvailableIndex(nodes []*nvapi.ComputeDomainNode, maxNodesPerIMEXDomain int) (int, error) { + if len(nodes) >= maxNodesPerIMEXDomain { + return -1, fmt.Errorf("cannot add more nodes, already at maximum (%d)", maxNodesPerIMEXDomain) + } + + // Create a map to track used indices + usedIndices := make(map[int]bool) + + // Collect all currently used indices + for _, node := range nodes { + usedIndices[node.Index] = true + } + + // Find the next available index, starting from 0 and filling gaps + nextIndex := 0 + for usedIndices[nextIndex] { + nextIndex++ + } + + // Ensure nextIndex is within the range 0..maxNodesPerIMEXDomain + if nextIndex < 0 || nextIndex >= maxNodesPerIMEXDomain { + return -1, fmt.Errorf("no available indices within maxNodesPerIMEXDomain (%d)", maxNodesPerIMEXDomain) + } + + return nextIndex, nil +} + // If we've reached the expected number of nodes and if there was actually a // change compared to the previously known set of nodes: pass info to IMEX // daemon controller. diff --git a/cmd/compute-domain-daemon/controller.go b/cmd/compute-domain-daemon/controller.go index ec4a0c57b..aa4e5775e 100644 --- a/cmd/compute-domain-daemon/controller.go +++ b/cmd/compute-domain-daemon/controller.go @@ -35,6 +35,7 @@ type ManagerConfig struct { computeDomainNamespace string cliqueID string podIP string + maxNodesPerIMEXDomain int } // ControllerConfig holds the configuration for the controller. @@ -45,6 +46,7 @@ type ControllerConfig struct { computeDomainNamespace string cliqueID string podIP string + maxNodesPerIMEXDomain int } // Controller manages the lifecycle of compute domain operations. @@ -73,6 +75,7 @@ func NewController(config *ControllerConfig) (*Controller, error) { computeDomainNamespace: config.computeDomainNamespace, cliqueID: config.cliqueID, podIP: config.podIP, + maxNodesPerIMEXDomain: config.maxNodesPerIMEXDomain, } controller := &Controller{ diff --git a/cmd/compute-domain-daemon/dnsnames.go b/cmd/compute-domain-daemon/dnsnames.go new file mode 100644 index 000000000..027178033 --- /dev/null +++ b/cmd/compute-domain-daemon/dnsnames.go @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2025 NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package main + +import ( + "fmt" + "maps" + "os" + "path/filepath" + "strings" + "sync" + + "k8s.io/klog/v2" + + nvapi "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1" +) + +const ( + hostsFilePath = "/etc/hosts" + dnsNamePrefix = "compute-domain-daemon-" + dnsNameFormat = dnsNamePrefix + "%d" +) + +// IPToDNSNameMap holds a map of IP Addresses to DNS names. +type IPToDNSNameMap map[string]string + +// DNSNameManager manages the allocation of static DNS names to IP addresses. +type DNSNameManager struct { + sync.Mutex + ipToDNSName IPToDNSNameMap + cliqueID string + maxNodesPerIMEXDomain int + nodesConfigPath string +} + +// NewDNSNameManager creates a new DNS name manager. +func NewDNSNameManager(cliqueID string, maxNodesPerIMEXDomain int, nodesConfigPath string) *DNSNameManager { + return &DNSNameManager{ + ipToDNSName: make(IPToDNSNameMap), + cliqueID: cliqueID, + maxNodesPerIMEXDomain: maxNodesPerIMEXDomain, + nodesConfigPath: nodesConfigPath, + } +} + +// UpdateDNSNameMappings updates the /etc/hosts file with any new IP to DNS name mappings. +func (m *DNSNameManager) UpdateDNSNameMappings(nodes []*nvapi.ComputeDomainNode) error { + m.Lock() + defer m.Unlock() + + // Make a local copy of the current ipToDNSName mappings + ipToDNSName := maps.Clone(m.ipToDNSName) + + // Prefilter nodes to only consider those with the matching cliqueID + var cliqueNodes []*nvapi.ComputeDomainNode + for _, node := range nodes { + if node.CliqueID == m.cliqueID { + cliqueNodes = append(cliqueNodes, node) + } + } + + // Find and remove stale IPs from map + currentIPs := make(map[string]bool) + for _, node := range cliqueNodes { + currentIPs[node.IPAddress] = true + } + for ip := range ipToDNSName { + if !currentIPs[ip] { + delete(ipToDNSName, ip) + } + } + + // Add new IPs to map + for _, node := range cliqueNodes { + // If IP already has a DNS name, skip it + if _, exists := ipToDNSName[node.IPAddress]; exists { + continue + } + + // Construct the DNS name from the node index + dnsName, err := m.constructDNSName(node) + if err != nil { + return fmt.Errorf("failed to allocate DNS name for IP %s: %w", node.IPAddress, err) + } + + // Assign the IP -> DNS name mapping + ipToDNSName[node.IPAddress] = dnsName + } + + // If the existing ipToDNSName mappings are unchanged, exit early + if maps.Equal(ipToDNSName, m.ipToDNSName) { + return nil + } + + // Otherwise, update the cached ipToDNSName mapping + m.ipToDNSName = ipToDNSName + + // And updated the hosts file with new mappings + return m.updateHostsFile() +} + +// LogDNSNameMappings logs the current compute-domain-daemon mappings from memory. +func (m *DNSNameManager) LogDNSNameMappings() { + m.Lock() + defer m.Unlock() + + if len(m.ipToDNSName) == 0 { + klog.Infof("Current compute-domain-daemon mappings: empty") + return + } + + klog.Infof("Current compute-domain-daemon mappings:") + for ip, dnsName := range m.ipToDNSName { + klog.Infof(" %s -> %s", ip, dnsName) + } +} + +// contructDNSName constructs a DNS name for a node based on its index field. +// Returns an error if the index is invalid or exceeds maxNodesPerIMEXDomain. +func (m *DNSNameManager) constructDNSName(node *nvapi.ComputeDomainNode) (string, error) { + if node.Index < 0 { + return "", fmt.Errorf("node %s has invalid index %d", node.Name, node.Index) + } + if node.Index >= m.maxNodesPerIMEXDomain { + return "", fmt.Errorf("node %s has invalid index %d, must be less than %d", node.Name, node.Index, m.maxNodesPerIMEXDomain) + } + dnsName := fmt.Sprintf(dnsNameFormat, node.Index) + return dnsName, nil +} + +// updateHostsFile updates the /etc/hosts file with current IP to DNS name mappings. +func (m *DNSNameManager) updateHostsFile() error { + // Read hosts file + hostsContent, err := os.ReadFile(hostsFilePath) + if err != nil { + return fmt.Errorf("failed to read %s: %w", hostsFilePath, err) + } + + // Grab any lines to preserve, skipping existing DNS name mappings + var preservedLines []string + for _, line := range strings.Split(string(hostsContent), "\n") { + line = strings.TrimSpace(line) + + // Skip existing compute-domain-daemon mappings + if strings.Contains(line, dnsNamePrefix) { + continue + } + + // Keep all other lines + preservedLines = append(preservedLines, line) + } + + // Add preserved lines + var newHostsContent strings.Builder + for _, line := range preservedLines { + newHostsContent.WriteString(line) + newHostsContent.WriteString("\n") + } + + // Add a separator comment + newHostsContent.WriteString("# Compute Domain Daemon mappings\n") + + // Add new DNS name mappings + for ip, dnsName := range m.ipToDNSName { + newHostsContent.WriteString(fmt.Sprintf("%s\t%s\n", ip, dnsName)) + } + + // Write the updated hosts file + if err := os.WriteFile(hostsFilePath, []byte(newHostsContent.String()), 0644); err != nil { + return fmt.Errorf("failed to write %s: %w", hostsFilePath, err) + } + + return nil +} + +// WriteNodesConfig creates a static nodes config file with DNS names. +func (m *DNSNameManager) WriteNodesConfig() error { + // Ensure the directory exists + dir := filepath.Dir(m.nodesConfigPath) + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("failed to create directory %s: %w", dir, err) + } + + // Create or overwrite the nodesConfig file + f, err := os.Create(m.nodesConfigPath) + if err != nil { + return fmt.Errorf("failed to create nodes config file: %w", err) + } + defer f.Close() + + // Write static DNS names + for i := 0; i < m.maxNodesPerIMEXDomain; i++ { + dnsName := fmt.Sprintf(dnsNameFormat, i) + if _, err := fmt.Fprintf(f, "%s\n", dnsName); err != nil { + return fmt.Errorf("failed to write to nodes config file: %w", err) + } + } + + klog.Infof("Created static nodes config file with %d DNS names using format %s", m.maxNodesPerIMEXDomain, dnsNameFormat) + + return nil +} diff --git a/cmd/compute-domain-daemon/main.go b/cmd/compute-domain-daemon/main.go index aa11110c4..eb37178c1 100644 --- a/cmd/compute-domain-daemon/main.go +++ b/cmd/compute-domain-daemon/main.go @@ -33,6 +33,7 @@ import ( "github.com/urfave/cli/v2" nvapi "github.com/NVIDIA/k8s-dra-driver-gpu/api/nvidia.com/resource/v1beta1" + "github.com/NVIDIA/k8s-dra-driver-gpu/pkg/featuregates" "github.com/NVIDIA/k8s-dra-driver-gpu/pkg/flags" ) @@ -51,6 +52,7 @@ type Flags struct { computeDomainNamespace string nodeName string podIP string + maxNodesPerIMEXDomain int loggingConfig *flags.LoggingConfig featureGateConfig *flags.FeatureGateConfig } @@ -128,6 +130,12 @@ func newApp() *cli.App { EnvVars: []string{"POD_IP"}, Destination: &flags.podIP, }, + &cli.IntFlag{ + Name: "max-nodes-per-imex-domain", + Usage: "The maximum number of possible nodes per IMEX domain", + EnvVars: []string{"MAX_NODES_PER_IMEX_DOMAIN"}, + Destination: &flags.maxNodesPerIMEXDomain, + }, } cliFlags = append(cliFlags, flags.featureGateConfig.Flags()...) cliFlags = append(cliFlags, flags.loggingConfig.Flags()...) @@ -163,7 +171,6 @@ func newApp() *cli.App { // Run invokes the IMEX daemon and manages its lifecycle. func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error { - // Support heterogeneous compute domain if flags.cliqueID == "" { fmt.Println("ClusterUUID and CliqueId are NOT set for GPUs on this node.") @@ -180,6 +187,7 @@ func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error { computeDomainNamespace: flags.computeDomainNamespace, nodeName: flags.nodeName, podIP: flags.podIP, + maxNodesPerIMEXDomain: flags.maxNodesPerIMEXDomain, } klog.Infof("config: %v", config) @@ -189,6 +197,18 @@ func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error { } // Prepare IMEX daemon process manager (not invoking the process yet). + var dnsNameManager *DNSNameManager + if featuregates.Enabled(featuregates.IMEXDaemonsWithDNSNames) { + // Prepare DNS name manager + dnsNameManager = NewDNSNameManager(flags.cliqueID, flags.maxNodesPerIMEXDomain, nodesConfigPath) + + // Create static nodes config file with DNS names + if err := dnsNameManager.WriteNodesConfig(); err != nil { + return fmt.Errorf("failed to create static nodes config: %w", err) + } + } + + // Prepare IMEX daemon process manager. daemonCommandLine := []string{imexBinaryPath, "-c", imexConfigPath} processManager := NewProcessManager(daemonCommandLine) @@ -210,14 +230,23 @@ func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error { } }() - // Start IMEXDaemonUpdateLoop() in goroutine (watches for CD status - // changes, and restarts the IMEX daemon as needed). + // Start IMEX daemon update loop in goroutine (watches for CD status + // changes and manages IMEX daemon updates). wg.Add(1) go func() { defer wg.Done() - if err := IMEXDaemonUpdateLoop(ctx, controller, flags.cliqueID, processManager); err != nil { - klog.Errorf("IMEXDaemonUpdateLoop failed, initiate shutdown: %s", err) - cancel() + if featuregates.Enabled(featuregates.IMEXDaemonsWithDNSNames) { + // Use new DNS name-based functionality + if err := IMEXDaemonUpdateLoopWithDNSNames(ctx, controller, processManager, dnsNameManager); err != nil { + klog.Errorf("IMEXDaemonUpdateLoop failed, initiate shutdown: %s", err) + cancel() + } + } else { + // Use original IP-based functionality + if err := IMEXDaemonUpdateLoopWithIPs(ctx, controller, flags.cliqueID, processManager); err != nil { + klog.Errorf("IMEXDaemonUpdateLoop failed, initiate shutdown: %s", err) + cancel() + } } }() @@ -239,14 +268,14 @@ func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error { return nil } -// IMEXDaemonUpdateLoop() reacts to ComputeDomain status changes by updating the +// IMEXDaemonUpdateLoopWithIPs reacts to ComputeDomain status changes by updating the // IMEX daemon nodes config file and (re)starting the IMEX daemon process. -func IMEXDaemonUpdateLoop(ctx context.Context, controller *Controller, cliqueID string, pm *ProcessManager) error { +func IMEXDaemonUpdateLoopWithIPs(ctx context.Context, controller *Controller, cliqueID string, pm *ProcessManager) error { for { klog.Infof("wait for nodes update") select { case <-ctx.Done(): - klog.Infof("shutdown: stop IMEXDaemonUpdateLoop") + klog.Infof("shutdown: stop IMEXDaemonUpdateLoopWithIPs") return nil case nodes := <-controller.GetNodesUpdateChan(): if err := writeNodesConfig(cliqueID, nodes); err != nil { @@ -263,6 +292,31 @@ func IMEXDaemonUpdateLoop(ctx context.Context, controller *Controller, cliqueID } } +// IMEXDaemonUpdateLoopWithDNSNames reacts to ComputeDomain status changes by +// updating the /etc/hosts file with IP to DNS name mappings. This relies on +// the IMEX daemon to pick up these changes automatically (and quickly) -- +// which it seems to do via grpc-based health-checking of individual +// connections. We only restart the IMEX daemon if it crashes (both +// unexpectedly and expectedly). +func IMEXDaemonUpdateLoopWithDNSNames(ctx context.Context, controller *Controller, processManager *ProcessManager, dnsNameManager *DNSNameManager) error { + for { + klog.Infof("wait for nodes update") + select { + case <-ctx.Done(): + klog.Infof("shutdown: stop IMEXDaemonUpdateLoopWithDNSNames") + return nil + case nodes := <-controller.GetNodesUpdateChan(): + if err := dnsNameManager.UpdateDNSNameMappings(nodes); err != nil { + return fmt.Errorf("failed to update DNS name => IP mappings: %w", err) + } + if err := processManager.EnsureStarted(); err != nil { + return fmt.Errorf("failed to ensure IMEX daemon is started: %w", err) + } + dnsNameManager.LogDNSNameMappings() + } + } +} + // check verifies if the node is IMEX capable and if so, checks if the IMEX daemon is ready. // It returns an error if any step fails. func check(ctx context.Context, cancel context.CancelFunc, flags *Flags) error { diff --git a/cmd/compute-domain-daemon/process.go b/cmd/compute-domain-daemon/process.go index 7704d1698..4a832087e 100644 --- a/cmd/compute-domain-daemon/process.go +++ b/cmd/compute-domain-daemon/process.go @@ -46,7 +46,7 @@ func NewProcessManager(cmd []string) *ProcessManager { return m } -// Restart() starts or restarts the process. +// Restart starts or restarts the process. func (m *ProcessManager) Restart() error { if m.handle != nil { if err := m.stop(); err != nil { @@ -56,6 +56,14 @@ func (m *ProcessManager) Restart() error { return m.start() } +// EnsureStarted starts the process if it is not already running. If the process is already started, this is a no-op. +func (m *ProcessManager) EnsureStarted() error { + if m.handle != nil { + return nil + } + return m.start() +} + func (m *ProcessManager) start() error { m.Lock() defer m.Unlock() diff --git a/deployments/helm/nvidia-dra-driver-gpu/crds/resource.nvidia.com_computedomains.yaml b/deployments/helm/nvidia-dra-driver-gpu/crds/resource.nvidia.com_computedomains.yaml index a0b7f3c00..edbc4ce98 100644 --- a/deployments/helm/nvidia-dra-driver-gpu/crds/resource.nvidia.com_computedomains.yaml +++ b/deployments/helm/nvidia-dra-driver-gpu/crds/resource.nvidia.com_computedomains.yaml @@ -85,6 +85,13 @@ spec: properties: cliqueID: type: string + index: + description: |- + The Index field is used to ensure a consistent IP-to-DNS name + mapping across all machines within an IMEX domain. Each node's index + directly determines its DNS name. It is marked as optional (but not + omitempty) in order to support downgrades and avoid an API bump. + type: integer ipAddress: type: string name: diff --git a/pkg/featuregates/featuregates.go b/pkg/featuregates/featuregates.go index aa4777e7b..ce113fb87 100644 --- a/pkg/featuregates/featuregates.go +++ b/pkg/featuregates/featuregates.go @@ -33,6 +33,9 @@ const ( // MPSSupport allows MPS (Multi-Process Service) settings to be specified. MPSSupport featuregate.Feature = "MPSSupport" + + // IMEXDaemonsWithDNSNames allows using DNS names instead of raw IPs for IMEX daemons. + IMEXDaemonsWithDNSNames featuregate.Feature = "IMEXDaemonsWithDNSNames" ) // FeatureGates is a singleton representing the set of all feature gates and their values. @@ -56,6 +59,13 @@ var defaultFeatureGates = map[featuregate.Feature]featuregate.VersionedSpecs{ Version: version.MajorMinor(25, 8), }, }, + IMEXDaemonsWithDNSNames: { + { + Default: false, + PreRelease: featuregate.Alpha, + Version: version.MajorMinor(25, 8), + }, + }, } // init instantiates and sets the singleton 'FeatureGates' variable with newFeatureGates(). diff --git a/templates/compute-domain-daemon.tmpl.yaml b/templates/compute-domain-daemon.tmpl.yaml index 4d2a602d2..143de3326 100644 --- a/templates/compute-domain-daemon.tmpl.yaml +++ b/templates/compute-domain-daemon.tmpl.yaml @@ -26,6 +26,8 @@ spec: image: {{ .ImageName }} command: ["compute-domain-daemon", "-v", "6", "run"] env: + - name: MAX_NODES_PER_IMEX_DOMAIN + value: "{{ .MaxNodesPerIMEXDomain }}" - name: NODE_NAME valueFrom: fieldRef: