Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmd/mapt/cmd/aws/hosts/fedora.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ func getFedoraCreate() *cobra.Command {
Spot: params.SpotArgs(),
Timeout: viper.GetString(params.Timeout),
Airgap: viper.IsSet(airgap),
ServiceEndpoints: params.NetworkServiceEndpoints()})
ServiceEndpoints: params.NetworkServiceEndpoints(),
VpcID: params.NetworkVpcID()})
},
}
flagSet := pflag.NewFlagSet(params.CreateCmdName, pflag.ExitOnError)
Expand Down
1 change: 1 addition & 0 deletions cmd/mapt/cmd/aws/hosts/rhel.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ func getRHELCreate() *cobra.Command {
Timeout: viper.GetString(params.Timeout),
Airgap: viper.IsSet(airgap),
ServiceEndpoints: params.NetworkServiceEndpoints(),
VpcID: params.NetworkVpcID(),
})
},
}
Expand Down
1 change: 1 addition & 0 deletions cmd/mapt/cmd/aws/hosts/rhelai.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ func getRHELAICreate() *cobra.Command {
AutoStart: viper.IsSet(params.RhelAIAutoStart),
VLLMExtraArgs: viper.GetString(params.RhelAIVLLMExtraArgs),
ExposePorts: viper.GetIntSlice(params.RhelAIExposePorts),
VpcID: params.NetworkVpcID(),
})
},
}
Expand Down
1 change: 1 addition & 0 deletions cmd/mapt/cmd/aws/hosts/windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ func getWindowsCreate() *cobra.Command {
Airgap: viper.IsSet(airgap),
Timeout: viper.GetString(params.Timeout),
ServiceEndpoints: params.NetworkServiceEndpoints(),
VpcID: params.NetworkVpcID(),
})
},
}
Expand Down
13 changes: 12 additions & 1 deletion cmd/mapt/cmd/params/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,9 @@ const (
KindExtraPortMappingsDesc = "Additional port mappings for the Kind cluster. Value should be a JSON array of objects with containerPort, hostPort, and protocol properties. Example: '[{\"containerPort\": 8080, \"hostPort\": 8080, \"protocol\": \"TCP\"}]'"

// Network
ServiceEndpoints = "service-endpoints"
ServiceEndpoints = "service-endpoints"
VpcID = "vpc-id"
VpcIDDesc = "ID of an existing VPC to deploy the instance into. When set, airgap is not supported and spot search is restricted to AZs with subnets in that VPC."

// Spot
spot = "spot"
Expand All @@ -222,12 +224,21 @@ const (

func AddNetworkFlags(fs *pflag.FlagSet, desc string) {
fs.StringSliceP(ServiceEndpoints, "", []string{}, desc)
fs.StringP(VpcID, "", "", VpcIDDesc)
}

func NetworkServiceEndpoints() []string {
return viper.GetStringSlice(ServiceEndpoints)
}

func NetworkVpcID() *string {
if viper.IsSet(VpcID) {
v := viper.GetString(VpcID)
return &v
}
return nil
}
Comment on lines +234 to +240

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟡 Minor | ⚡ Quick win

Empty-string --vpc-id treated as “set”.

viper.IsSet returns true once the flag is Changed, even if the value is "" (e.g. --vpc-id="$VPC_ID" with an unset env var in a CI script). NetworkVpcID() then returns a pointer to "" instead of nil, and downstream (NetworkArgs.VpcID != nil) will route into existingVPCNetwork and call ec2.GetVpc with an empty ID, failing deep in the Pulumi provider instead of failing fast with a clear message.

🔧 Proposed fix
 func NetworkVpcID() *string {
-	if viper.IsSet(VpcID) {
-		v := viper.GetString(VpcID)
-		return &v
-	}
-	return nil
+	if v := viper.GetString(VpcID); v != "" {
+		return &v
+	}
+	return nil
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
func NetworkVpcID() *string {
if viper.IsSet(VpcID) {
v := viper.GetString(VpcID)
return &v
}
return nil
}
func NetworkVpcID() *string {
if v := viper.GetString(VpcID); v != "" {
return &v
}
return nil
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@cmd/mapt/cmd/params/params.go` around lines 234 - 240, `NetworkVpcID()`
currently treats an empty `--vpc-id` as present because it relies on
`viper.IsSet`, which can return true for a changed-but-empty flag. Update the
`NetworkVpcID` helper in `params.go` to return nil when the resolved VPC ID is
empty (or whitespace) so `NetworkArgs.VpcID` does not route into
`existingVPCNetwork` with an invalid ID. Use the `VpcID` flag lookup in
`NetworkVpcID` as the single place to normalize this behavior before downstream
calls like `ec2.GetVpc`.


func AddSpotFlags(fs *pflag.FlagSet) {
fs.Bool(spot, false, spotDesc)
fs.StringP(spotTolerance, "", spotToleranceDefault, spotToleranceDesc)
Expand Down
95 changes: 54 additions & 41 deletions pkg/provider/aws/action/fedora/fedora.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,28 +33,30 @@ import (
)

type FedoraArgs struct {
Prefix string
Version string
Arch string
ComputeRequest *cr.ComputeRequestArgs
Spot *spotTypes.SpotArgs
Airgap bool
Prefix string
Version string
Arch string
ComputeRequest *cr.ComputeRequestArgs
Spot *spotTypes.SpotArgs
Airgap bool
ServiceEndpoints []string
VpcID *string
// If timeout is set a severless scheduled task will be created to self destroy the resources
Timeout string
}

type fedoraRequest struct {
mCtx *mc.Context
prefix *string
version *string
arch *string
spot bool
timeout *string
mCtx *mc.Context
prefix *string
version *string
arch *string
spot bool
timeout *string
serviceEndpoints []string
allocationData *allocation.AllocationResult
airgap *bool
diskSize *int
vpcID *string
allocationData *allocation.AllocationResult
airgap *bool
diskSize *int
// internal management
// For airgap scenario there is an orchestation of
// a phase with connectivity on the machine (allowing bootstraping)
Expand All @@ -81,6 +83,9 @@ func Create(mCtxArgs *mc.ContextArgs, args *FedoraArgs) (err error) {
return err
}
// Compose request
if args.VpcID != nil && args.Airgap {
return fmt.Errorf("--vpc-id and --airgap are mutually exclusive")
}
prefix := util.If(len(args.Prefix) > 0, args.Prefix, "main")
r := fedoraRequest{
mCtx: mCtx,
Expand All @@ -89,6 +94,7 @@ func Create(mCtxArgs *mc.ContextArgs, args *FedoraArgs) (err error) {
arch: &args.Arch,
timeout: &args.Timeout,
serviceEndpoints: args.ServiceEndpoints,
vpcID: args.VpcID,
airgap: &args.Airgap,
diskSize: args.ComputeRequest.DiskSize}
if args.Spot != nil {
Expand All @@ -100,6 +106,7 @@ func Create(mCtxArgs *mc.ContextArgs, args *FedoraArgs) (err error) {
ComputeRequest: args.ComputeRequest,
AMIProductDescription: &amiProduct,
Spot: args.Spot,
VpcID: args.VpcID,
})
if err != nil {
return err
Expand Down Expand Up @@ -201,7 +208,8 @@ func (r *fedoraRequest) deploy(ctx *pulumi.Context) error {
CreateLoadBalancer: r.spot,
Airgap: *r.airgap,
AirgapPhaseConnectivity: r.airgapPhaseConnectivity,
ServiceEndpoints: r.serviceEndpoints,
ServiceEndpoints: r.serviceEndpoints,
VpcID: r.vpcID,
})
if err != nil {
return err
Expand All @@ -217,7 +225,7 @@ func (r *fedoraRequest) deploy(ctx *pulumi.Context) error {
ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, outputUserPrivateKey),
keyResources.PrivateKey.PrivateKeyPem)
// Security groups
securityGroups, err := securityGroups(ctx, r.mCtx, r.prefix, nw.Vpc)
securityGroups, err := securityGroups(ctx, r.mCtx, r.prefix, nw.Vpc, nw.IsPublic)
if err != nil {
return err
}
Expand Down Expand Up @@ -249,7 +257,7 @@ func (r *fedoraRequest) deploy(ctx *pulumi.Context) error {
Eip: nw.Eip,
LBTargetGroups: []int{22},
}
if r.spot {
if r.spot && nw.IsPublic {
cr.Spot = true
cr.SpotPrice = *r.allocationData.SpotPrice
}
Expand All @@ -273,6 +281,11 @@ func (r *fedoraRequest) deploy(ctx *pulumi.Context) error {
return err
}
}
// Skip SSH readiness check for private subnets: the machine has no inbound
// connectivity and is expected to register itself outbound (e.g. GitLab runner).
if !nw.IsPublic {
return nil
}
return c.Readiness(ctx, command.CommandPing, *r.prefix, awsFedoraDedicatedID,
keyResources.PrivateKey, amiUserDefault, nw.Bastion, c.Dependencies)
}
Expand All @@ -293,31 +306,32 @@ func manageResults(mCtx *mc.Context, stackResult auto.UpResult, prefix *string,
return output.Write(stackResult, mCtx.GetResultsOutputPath(), results)
}

// security group for mac machine with ingress rules for ssh and vnc
// securityGroups builds the security group for the Fedora machine.
// When public is true the SG allows SSH (and optional Cirrus) inbound from anywhere.
// When false (private subnet, outbound-only workload) no inbound rules are added;
// the default egress rule permits all outbound traffic so the runner can reach GitLab.
func securityGroups(ctx *pulumi.Context, mCtx *mc.Context, prefix *string,
vpc *ec2.Vpc) (pulumi.StringArray, error) {
// ingress for ssh access from 0.0.0.0
vpc *ec2.Vpc, public bool) (pulumi.StringArray, error) {
var ingressRules []securityGroup.IngressRules
sshIngressRule := securityGroup.SSH_TCP
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
ingressRules = []securityGroup.IngressRules{sshIngressRule}
// Integration ports
cirrusPort, err := cirrus.CirrusPort()
if err != nil {
return nil, err
}
if cirrusPort != nil {
ingressRules = append(ingressRules,
securityGroup.IngressRules{
Description: fmt.Sprintf("Cirrus port for %s", awsFedoraDedicatedID),
FromPort: *cirrusPort,
ToPort: *cirrusPort,
Protocol: "tcp",
CidrBlocks: infra.NETWORKING_CIDR_ANY_IPV4,
})
if public {
sshIngressRule := securityGroup.SSH_TCP
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
ingressRules = []securityGroup.IngressRules{sshIngressRule}
cirrusPort, err := cirrus.CirrusPort()
if err != nil {
return nil, err
}
if cirrusPort != nil {
ingressRules = append(ingressRules,
securityGroup.IngressRules{
Description: fmt.Sprintf("Cirrus port for %s", awsFedoraDedicatedID),
FromPort: *cirrusPort,
ToPort: *cirrusPort,
Protocol: "tcp",
CidrBlocks: infra.NETWORKING_CIDR_ANY_IPV4,
})
}
}

// Create SG with ingress rules
sg, err := securityGroup.SGRequest{
Name: resourcesUtil.GetResourceName(*prefix, awsFedoraDedicatedID, "sg"),
VPC: vpc,
Expand All @@ -327,7 +341,6 @@ func securityGroups(ctx *pulumi.Context, mCtx *mc.Context, prefix *string,
if err != nil {
return nil, err
}
// Convert to an array of IDs
sgs := util.ArrayConvert([]*ec2.SecurityGroup{sg.SG},
func(sg *ec2.SecurityGroup) pulumi.StringInput {
return sg.ID()
Expand Down
42 changes: 26 additions & 16 deletions pkg/provider/aws/action/rhel-ai/rhelai.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type rhelAIRequest struct {
spot bool
timeout *string
serviceEndpoints []string
vpcID *string
allocationData *allocation.AllocationResult
diskSize *int
model *string
Expand Down Expand Up @@ -81,6 +82,7 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiRHELAI.RHELAIArgs) (err error) {
arch: &args.Arch,
timeout: &args.Timeout,
serviceEndpoints: args.ServiceEndpoints,
vpcID: args.VpcID,
diskSize: args.ComputeRequest.DiskSize,
model: &args.Model,
hfToken: &args.HFToken,
Expand All @@ -97,6 +99,7 @@ func Create(mCtxArgs *mc.ContextArgs, args *apiRHELAI.RHELAIArgs) (err error) {
ComputeRequest: args.ComputeRequest,
AMIProductDescription: &amiProduct,
Spot: args.Spot,
VpcID: args.VpcID,
})
if err != nil {
return err
Expand Down Expand Up @@ -228,6 +231,7 @@ func (r *rhelAIRequest) deploy(ctx *pulumi.Context) error {
AZ: *r.allocationData.AZ,
CreateLoadBalancer: r.allocationData.SpotPrice != nil,
ServiceEndpoints: r.serviceEndpoints,
VpcID: r.vpcID,
})
if err != nil {
return err
Expand All @@ -243,7 +247,7 @@ func (r *rhelAIRequest) deploy(ctx *pulumi.Context) error {
ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, outputUserPrivateKey),
keyResources.PrivateKey.PrivateKeyPem)
// Security groups
securityGroups, err := r.securityGroups(ctx, r.mCtx, nw.Vpc)
securityGroups, err := r.securityGroups(ctx, r.mCtx, nw.Vpc, nw.IsPublic)
if err != nil {
return err
}
Expand All @@ -265,7 +269,7 @@ func (r *rhelAIRequest) deploy(ctx *pulumi.Context) error {
LB: nw.LoadBalancer,
Eip: nw.Eip,
LBTargetGroups: r.lbTargetGroups()}
if r.allocationData.SpotPrice != nil {
if r.allocationData.SpotPrice != nil && nw.IsPublic {
cr.Spot = true
cr.SpotPrice = *r.allocationData.SpotPrice
}
Expand All @@ -289,6 +293,12 @@ func (r *rhelAIRequest) deploy(ctx *pulumi.Context) error {
return err
}
}
if !nw.IsPublic {
if r.autoStart {
return fmt.Errorf("--auto-start requires SSH access to configure RHAIIS; private subnet deployments do not support --auto-start")
}
return nil
}
Comment on lines +296 to +301

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎯 Functional Correctness | 🟠 Major | ⚡ Quick win

Don’t report success when private RHEL AI cannot run auto-start setup.

This returns before rhaiisSetupScript() runs, so autoStart private-subnet deployments can succeed without configuring the service. Return an explicit error or move setup to an outbound-compatible mechanism.

Proposed guard
 	if !nw.IsPublic {
+		if r.autoStart {
+			return fmt.Errorf("auto-start is not supported in private subnets without SSH connectivity")
+		}
 		return nil
 	}
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if !nw.IsPublic {
return nil
}
if !nw.IsPublic {
if r.autoStart {
return fmt.Errorf("auto-start is not supported in private subnets without SSH connectivity")
}
return nil
}
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@pkg/provider/aws/action/rhel-ai/rhelai.go` around lines 296 - 298, The early
return in the RHEL AI networking path is incorrectly treating private-subnet
deployments as successful even though `rhaiisSetupScript()` is skipped. Update
the logic around the `autoStart` flow in the RHEL AI action so private
deployments either fail with an explicit error or use an alternate
outbound-compatible setup path before proceeding. Make sure the fix is applied
in the same control flow that checks `nw.IsPublic`, and verify
`rhaiisSetupScript` still runs whenever required for successful service
configuration.

if !r.autoStart {
return c.Readiness(ctx, command.CommandPing, *r.prefix, awsRHELDedicatedID,
keyResources.PrivateKey, amiUserDefault, nil, c.Dependencies)
Expand Down Expand Up @@ -321,22 +331,22 @@ func (r *rhelAIRequest) manageResults(stackResult auto.UpResult) error {
return output.Write(stackResult, r.mCtx.GetResultsOutputPath(), results)
}

// security group for mac machine with ingress rules for ssh and vnc
func (r *rhelAIRequest) securityGroups(ctx *pulumi.Context, mCtx *mc.Context,
vpc *ec2.Vpc) (pulumi.StringArray, error) {
// ingress for ssh access from 0.0.0.0
sshIngressRule := securityGroup.SSH_TCP
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
ingressRules := []securityGroup.IngressRules{sshIngressRule}
for _, port := range r.exposePorts {
rule := securityGroup.IngressRules{
Description: fmt.Sprintf("port-%d", port),
FromPort: port,
ToPort: port,
Protocol: "tcp",
CidrBlocks: infra.NETWORKING_CIDR_ANY_IPV4,
vpc *ec2.Vpc, public bool) (pulumi.StringArray, error) {
var ingressRules []securityGroup.IngressRules
if public {
sshIngressRule := securityGroup.SSH_TCP
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
ingressRules = []securityGroup.IngressRules{sshIngressRule}
for _, port := range r.exposePorts {
ingressRules = append(ingressRules, securityGroup.IngressRules{
Description: fmt.Sprintf("port-%d", port),
FromPort: port,
ToPort: port,
Protocol: "tcp",
CidrBlocks: infra.NETWORKING_CIDR_ANY_IPV4,
})
}
ingressRules = append(ingressRules, rule)
}
// Create SG with ingress rules
sg, err := securityGroup.SGRequest{
Expand Down
Loading