Skip to content

Commit 8f2067b

Browse files
adrianrioboclaude
andcommitted
fix(aws): support private subnets when deploying into existing VPCs
When --vpc-id is set and the chosen AZ contains only private subnets, mapt now falls back to using any available subnet instead of failing. Machines in private subnets connect outbound only (e.g. GitLab runner registration), so inbound SSH rules, EIP allocation, load balancer creation and SSH readiness checks are all skipped automatically. Key changes: - existingVPCNetwork tries GetPublicSubnetIDInAZ first; on failure falls back to GetAnySubnetIDInAZ and marks IsPublic=false - NetworkResult.IsPublic propagates to all host deploy functions; EIP and LB are only created when IsPublic is true - ComputeRequest respects nil Eip: no AssociatePublicIpAddress, no EIP association, no LB target groups; GetHostDnsName falls back to the instance private IP when neither EIP nor LB is present - Security groups omit all inbound rules for private subnet deploys - GetSubnetAZsForVPC returns all AZs (public and private) since private-subnet AZs are now valid deployment targets - GetAnySubnetIDInAZ added to data/network.go - Spot AZ resolution hardened: describeAvailabilityZonesAllAsync falls back to the standard describe when AllAvailabilityZones:true is SCP-blocked; getPlacementScores restricts the placement score request to regions where AZ ID resolution actually succeeded Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 92d011a commit 8f2067b

9 files changed

Lines changed: 213 additions & 110 deletions

File tree

pkg/provider/aws/action/fedora/fedora.go

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ func (r *fedoraRequest) deploy(ctx *pulumi.Context) error {
225225
ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, outputUserPrivateKey),
226226
keyResources.PrivateKey.PrivateKeyPem)
227227
// Security groups
228-
securityGroups, err := securityGroups(ctx, r.mCtx, r.prefix, nw.Vpc)
228+
securityGroups, err := securityGroups(ctx, r.mCtx, r.prefix, nw.Vpc, nw.IsPublic)
229229
if err != nil {
230230
return err
231231
}
@@ -257,7 +257,7 @@ func (r *fedoraRequest) deploy(ctx *pulumi.Context) error {
257257
Eip: nw.Eip,
258258
LBTargetGroups: []int{22},
259259
}
260-
if r.spot {
260+
if r.spot && nw.IsPublic {
261261
cr.Spot = true
262262
cr.SpotPrice = *r.allocationData.SpotPrice
263263
}
@@ -281,6 +281,11 @@ func (r *fedoraRequest) deploy(ctx *pulumi.Context) error {
281281
return err
282282
}
283283
}
284+
// Skip SSH readiness check for private subnets: the machine has no inbound
285+
// connectivity and is expected to register itself outbound (e.g. GitLab runner).
286+
if !nw.IsPublic {
287+
return nil
288+
}
284289
return c.Readiness(ctx, command.CommandPing, *r.prefix, awsFedoraDedicatedID,
285290
keyResources.PrivateKey, amiUserDefault, nw.Bastion, c.Dependencies)
286291
}
@@ -301,31 +306,32 @@ func manageResults(mCtx *mc.Context, stackResult auto.UpResult, prefix *string,
301306
return output.Write(stackResult, mCtx.GetResultsOutputPath(), results)
302307
}
303308

304-
// security group for mac machine with ingress rules for ssh and vnc
309+
// securityGroups builds the security group for the Fedora machine.
310+
// When public is true the SG allows SSH (and optional Cirrus) inbound from anywhere.
311+
// When false (private subnet, outbound-only workload) no inbound rules are added;
312+
// the default egress rule permits all outbound traffic so the runner can reach GitLab.
305313
func securityGroups(ctx *pulumi.Context, mCtx *mc.Context, prefix *string,
306-
vpc *ec2.Vpc) (pulumi.StringArray, error) {
307-
// ingress for ssh access from 0.0.0.0
314+
vpc *ec2.Vpc, public bool) (pulumi.StringArray, error) {
308315
var ingressRules []securityGroup.IngressRules
309-
sshIngressRule := securityGroup.SSH_TCP
310-
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
311-
ingressRules = []securityGroup.IngressRules{sshIngressRule}
312-
// Integration ports
313-
cirrusPort, err := cirrus.CirrusPort()
314-
if err != nil {
315-
return nil, err
316-
}
317-
if cirrusPort != nil {
318-
ingressRules = append(ingressRules,
319-
securityGroup.IngressRules{
320-
Description: fmt.Sprintf("Cirrus port for %s", awsFedoraDedicatedID),
321-
FromPort: *cirrusPort,
322-
ToPort: *cirrusPort,
323-
Protocol: "tcp",
324-
CidrBlocks: infra.NETWORKING_CIDR_ANY_IPV4,
325-
})
316+
if public {
317+
sshIngressRule := securityGroup.SSH_TCP
318+
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
319+
ingressRules = []securityGroup.IngressRules{sshIngressRule}
320+
cirrusPort, err := cirrus.CirrusPort()
321+
if err != nil {
322+
return nil, err
323+
}
324+
if cirrusPort != nil {
325+
ingressRules = append(ingressRules,
326+
securityGroup.IngressRules{
327+
Description: fmt.Sprintf("Cirrus port for %s", awsFedoraDedicatedID),
328+
FromPort: *cirrusPort,
329+
ToPort: *cirrusPort,
330+
Protocol: "tcp",
331+
CidrBlocks: infra.NETWORKING_CIDR_ANY_IPV4,
332+
})
333+
}
326334
}
327-
328-
// Create SG with ingress rules
329335
sg, err := securityGroup.SGRequest{
330336
Name: resourcesUtil.GetResourceName(*prefix, awsFedoraDedicatedID, "sg"),
331337
VPC: vpc,
@@ -335,7 +341,6 @@ func securityGroups(ctx *pulumi.Context, mCtx *mc.Context, prefix *string,
335341
if err != nil {
336342
return nil, err
337343
}
338-
// Convert to an array of IDs
339344
sgs := util.ArrayConvert([]*ec2.SecurityGroup{sg.SG},
340345
func(sg *ec2.SecurityGroup) pulumi.StringInput {
341346
return sg.ID()

pkg/provider/aws/action/rhel-ai/rhelai.go

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ func (r *rhelAIRequest) deploy(ctx *pulumi.Context) error {
247247
ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, outputUserPrivateKey),
248248
keyResources.PrivateKey.PrivateKeyPem)
249249
// Security groups
250-
securityGroups, err := r.securityGroups(ctx, r.mCtx, nw.Vpc)
250+
securityGroups, err := r.securityGroups(ctx, r.mCtx, nw.Vpc, nw.IsPublic)
251251
if err != nil {
252252
return err
253253
}
@@ -269,7 +269,7 @@ func (r *rhelAIRequest) deploy(ctx *pulumi.Context) error {
269269
LB: nw.LoadBalancer,
270270
Eip: nw.Eip,
271271
LBTargetGroups: r.lbTargetGroups()}
272-
if r.allocationData.SpotPrice != nil {
272+
if r.allocationData.SpotPrice != nil && nw.IsPublic {
273273
cr.Spot = true
274274
cr.SpotPrice = *r.allocationData.SpotPrice
275275
}
@@ -293,6 +293,12 @@ func (r *rhelAIRequest) deploy(ctx *pulumi.Context) error {
293293
return err
294294
}
295295
}
296+
if !nw.IsPublic {
297+
if r.autoStart {
298+
return fmt.Errorf("--auto-start requires SSH access to configure RHAIIS; private subnet deployments do not support --auto-start")
299+
}
300+
return nil
301+
}
296302
if !r.autoStart {
297303
return c.Readiness(ctx, command.CommandPing, *r.prefix, awsRHELDedicatedID,
298304
keyResources.PrivateKey, amiUserDefault, nil, c.Dependencies)
@@ -325,22 +331,22 @@ func (r *rhelAIRequest) manageResults(stackResult auto.UpResult) error {
325331
return output.Write(stackResult, r.mCtx.GetResultsOutputPath(), results)
326332
}
327333

328-
// security group for mac machine with ingress rules for ssh and vnc
329334
func (r *rhelAIRequest) securityGroups(ctx *pulumi.Context, mCtx *mc.Context,
330-
vpc *ec2.Vpc) (pulumi.StringArray, error) {
331-
// ingress for ssh access from 0.0.0.0
332-
sshIngressRule := securityGroup.SSH_TCP
333-
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
334-
ingressRules := []securityGroup.IngressRules{sshIngressRule}
335-
for _, port := range r.exposePorts {
336-
rule := securityGroup.IngressRules{
337-
Description: fmt.Sprintf("port-%d", port),
338-
FromPort: port,
339-
ToPort: port,
340-
Protocol: "tcp",
341-
CidrBlocks: infra.NETWORKING_CIDR_ANY_IPV4,
335+
vpc *ec2.Vpc, public bool) (pulumi.StringArray, error) {
336+
var ingressRules []securityGroup.IngressRules
337+
if public {
338+
sshIngressRule := securityGroup.SSH_TCP
339+
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
340+
ingressRules = []securityGroup.IngressRules{sshIngressRule}
341+
for _, port := range r.exposePorts {
342+
ingressRules = append(ingressRules, securityGroup.IngressRules{
343+
Description: fmt.Sprintf("port-%d", port),
344+
FromPort: port,
345+
ToPort: port,
346+
Protocol: "tcp",
347+
CidrBlocks: infra.NETWORKING_CIDR_ANY_IPV4,
348+
})
342349
}
343-
ingressRules = append(ingressRules, rule)
344350
}
345351
// Create SG with ingress rules
346352
sg, err := securityGroup.SGRequest{

pkg/provider/aws/action/rhel/rhel.go

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ func (r *rhelRequest) deploy(ctx *pulumi.Context) error {
230230
ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, outputUserPrivateKey),
231231
keyResources.PrivateKey.PrivateKeyPem)
232232
// Security groups
233-
securityGroups, err := securityGroups(ctx, r.mCtx, r.prefix, nw.Vpc)
233+
securityGroups, err := securityGroups(ctx, r.mCtx, r.prefix, nw.Vpc, nw.IsPublic)
234234
if err != nil {
235235
return err
236236
}
@@ -267,7 +267,7 @@ func (r *rhelRequest) deploy(ctx *pulumi.Context) error {
267267
LB: nw.LoadBalancer,
268268
Eip: nw.Eip,
269269
LBTargetGroups: []int{22}}
270-
if r.allocationData.SpotPrice != nil {
270+
if r.allocationData.SpotPrice != nil && nw.IsPublic {
271271
cr.Spot = true
272272
cr.SpotPrice = *r.allocationData.SpotPrice
273273
}
@@ -291,6 +291,9 @@ func (r *rhelRequest) deploy(ctx *pulumi.Context) error {
291291
return err
292292
}
293293
}
294+
if !nw.IsPublic {
295+
return nil
296+
}
294297
return c.Readiness(ctx, command.CommandCloudInitWait, *r.prefix, awsRHELDedicatedID,
295298
keyResources.PrivateKey, amiUserDefault, nw.Bastion, c.Dependencies)
296299
}
@@ -311,24 +314,23 @@ func manageResults(mCtx *mc.Context, stackResult auto.UpResult, prefix *string,
311314
return output.Write(stackResult, mCtx.GetResultsOutputPath(), results)
312315
}
313316

314-
// security group for mac machine with ingress rules for ssh and vnc
315317
func securityGroups(ctx *pulumi.Context, mCtx *mc.Context, prefix *string,
316-
vpc *ec2.Vpc) (pulumi.StringArray, error) {
317-
// ingress for ssh access from 0.0.0.0
318-
sshIngressRule := securityGroup.SSH_TCP
319-
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
320-
// Create SG with ingress rules
318+
vpc *ec2.Vpc, public bool) (pulumi.StringArray, error) {
319+
var ingressRules []securityGroup.IngressRules
320+
if public {
321+
sshIngressRule := securityGroup.SSH_TCP
322+
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
323+
ingressRules = []securityGroup.IngressRules{sshIngressRule}
324+
}
321325
sg, err := securityGroup.SGRequest{
322-
Name: resourcesUtil.GetResourceName(*prefix, awsRHELDedicatedID, "sg"),
323-
VPC: vpc,
324-
Description: fmt.Sprintf("sg for %s", awsRHELDedicatedID),
325-
IngressRules: []securityGroup.IngressRules{
326-
sshIngressRule},
326+
Name: resourcesUtil.GetResourceName(*prefix, awsRHELDedicatedID, "sg"),
327+
VPC: vpc,
328+
Description: fmt.Sprintf("sg for %s", awsRHELDedicatedID),
329+
IngressRules: ingressRules,
327330
}.Create(ctx, mCtx)
328331
if err != nil {
329332
return nil, err
330333
}
331-
// Convert to an array of IDs
332334
sgs := util.ArrayConvert([]*ec2.SecurityGroup{sg.SG},
333335
func(sg *ec2.SecurityGroup) pulumi.StringInput {
334336
return sg.ID()

pkg/provider/aws/action/windows/windows.go

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ func (r *windowsServerRequest) deploy(ctx *pulumi.Context) error {
283283
ctx.Export(fmt.Sprintf("%s-%s", *r.prefix, outputUserPrivateKey),
284284
keyResources.PrivateKey.PrivateKeyPem)
285285
// Security groups
286-
securityGroups, err := securityGroups(ctx, r.mCtx, r.prefix, nw.Vpc)
286+
securityGroups, err := securityGroups(ctx, r.mCtx, r.prefix, nw.Vpc, nw.IsPublic)
287287
if err != nil {
288288
return err
289289
}
@@ -321,7 +321,7 @@ func (r *windowsServerRequest) deploy(ctx *pulumi.Context) error {
321321
LB: nw.LoadBalancer,
322322
Eip: nw.Eip,
323323
LBTargetGroups: []int{22, 3389}}
324-
if r.allocationData.SpotPrice != nil {
324+
if r.allocationData.SpotPrice != nil && nw.IsPublic {
325325
cr.Spot = true
326326
cr.SpotPrice = *r.allocationData.SpotPrice
327327
}
@@ -347,6 +347,9 @@ func (r *windowsServerRequest) deploy(ctx *pulumi.Context) error {
347347
return err
348348
}
349349
}
350+
if !nw.IsPublic {
351+
return nil
352+
}
350353
return c.Readiness(ctx, command.CommandPing, *r.prefix, awsWindowsDedicatedID,
351354
keyResources.PrivateKey, *r.amiUser, nw.Bastion, c.Dependencies)
352355
}
@@ -368,26 +371,25 @@ func manageResults(mCtx *mc.Context, stackResult auto.UpResult, prefix *string,
368371
return output.Write(stackResult, mCtx.GetResultsOutputPath(), results)
369372
}
370373

371-
// security group for mac machine with ingress rules for ssh and vnc
372374
func securityGroups(ctx *pulumi.Context, mCtx *mc.Context, prefix *string,
373-
vpc *ec2.Vpc) (pulumi.StringArray, error) {
374-
// ingress for ssh access from 0.0.0.0
375-
sshIngressRule := securityGroup.SSH_TCP
376-
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
377-
rdpIngressRule := securityGroup.RDP_TCP
378-
rdpIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
379-
// Create SG with ingress rules
375+
vpc *ec2.Vpc, public bool) (pulumi.StringArray, error) {
376+
var ingressRules []securityGroup.IngressRules
377+
if public {
378+
sshIngressRule := securityGroup.SSH_TCP
379+
sshIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
380+
rdpIngressRule := securityGroup.RDP_TCP
381+
rdpIngressRule.CidrBlocks = infra.NETWORKING_CIDR_ANY_IPV4
382+
ingressRules = []securityGroup.IngressRules{sshIngressRule, rdpIngressRule}
383+
}
380384
sg, err := securityGroup.SGRequest{
381-
Name: resourcesUtil.GetResourceName(*prefix, awsWindowsDedicatedID, "sg"),
382-
VPC: vpc,
383-
Description: fmt.Sprintf("sg for %s", awsWindowsDedicatedID),
384-
IngressRules: []securityGroup.IngressRules{
385-
sshIngressRule, rdpIngressRule},
385+
Name: resourcesUtil.GetResourceName(*prefix, awsWindowsDedicatedID, "sg"),
386+
VPC: vpc,
387+
Description: fmt.Sprintf("sg for %s", awsWindowsDedicatedID),
388+
IngressRules: ingressRules,
386389
}.Create(ctx, mCtx)
387390
if err != nil {
388391
return nil, err
389392
}
390-
// Convert to an array of IDs
391393
sgs := util.ArrayConvert([]*ec2.SecurityGroup{sg.SG},
392394
func(sg *ec2.SecurityGroup) pulumi.StringInput {
393395
return sg.ID()

pkg/provider/aws/data/azs.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ type AvailabilityZonesResult struct {
4444

4545
func describeAvailabilityZonesAllAsync(ctx context.Context, regionName string, c chan AvailabilityZonesResult) {
4646
data, err := describeAvailabilityZonesAll(ctx, regionName)
47+
if err != nil || len(data) == 0 {
48+
// AllAvailabilityZones: true may be SCP-blocked or unsupported; fall back.
49+
data, err = describeAvailabilityZones(ctx, regionName, nil)
50+
}
4751
c <- AvailabilityZonesResult{AvailabilityZones: data, Err: err}
4852
}
4953

@@ -131,10 +135,15 @@ func describeAvailabilityZonesByRegions(ctx context.Context, regions []string) m
131135
}
132136
for i := 0; i < len(regions); i++ {
133137
availabilityZonesResult := <-c
134-
if availabilityZonesResult.Err == nil {
135-
region := availabilityZonesResult.AvailabilityZones[0].RegionName
136-
result[*region] = append(result[*region], availabilityZonesResult.AvailabilityZones...)
138+
if availabilityZonesResult.Err != nil {
139+
logging.Debugf("could not describe AZs: %v", availabilityZonesResult.Err)
140+
continue
141+
}
142+
if len(availabilityZonesResult.AvailabilityZones) == 0 {
143+
continue
137144
}
145+
region := availabilityZonesResult.AvailabilityZones[0].RegionName
146+
result[*region] = append(result[*region], availabilityZonesResult.AvailabilityZones...)
138147
}
139148
close(c)
140149
return result

pkg/provider/aws/data/network.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,9 @@ func getPublicSubnets(ctx context.Context, client *ec2.Client, vpcID string) (su
9191
return
9292
}
9393

94-
// GetSubnetAZsForVPC returns the unique AZ names of all subnets that belong to the specified VPC.
94+
// GetSubnetAZsForVPC returns the unique AZ names of all subnets in the specified VPC.
95+
// Both public and private subnets are included; callers that need public-only access
96+
// should use GetPublicSubnetIDInAZ and handle the private-subnet fallback themselves.
9597
func GetSubnetAZsForVPC(ctx context.Context, region, vpcID string) ([]string, error) {
9698
cfg, err := getConfig(ctx, region)
9799
if err != nil {
@@ -123,6 +125,29 @@ func GetSubnetAZsForVPC(ctx context.Context, region, vpcID string) ([]string, er
123125
return azs, nil
124126
}
125127

128+
// GetAnySubnetIDInAZ returns the first available subnet (public or private) in the
129+
// given AZ within the specified VPC. Used as a fallback when no public subnet exists.
130+
func GetAnySubnetIDInAZ(ctx context.Context, region, vpcID, az string) (*string, error) {
131+
cfg, err := getConfig(ctx, region)
132+
if err != nil {
133+
return nil, err
134+
}
135+
client := ec2.NewFromConfig(cfg)
136+
subnetsOutput, err := client.DescribeSubnets(ctx, &ec2.DescribeSubnetsInput{
137+
Filters: []ec2types.Filter{
138+
{Name: aws.String(filterVPCID), Values: []string{vpcID}},
139+
{Name: aws.String(filterAvailabilityZone), Values: []string{az}},
140+
},
141+
})
142+
if err != nil {
143+
return nil, fmt.Errorf("failed to describe subnets in VPC %s AZ %s: %w", vpcID, az, err)
144+
}
145+
if len(subnetsOutput.Subnets) == 0 {
146+
return nil, fmt.Errorf("no subnet found in VPC %s AZ %s", vpcID, az)
147+
}
148+
return subnetsOutput.Subnets[0].SubnetId, nil
149+
}
150+
126151
// GetPublicSubnetIDInAZ returns a public subnet ID in the given AZ within the specified VPC.
127152
func GetPublicSubnetIDInAZ(ctx context.Context, region, vpcID, az string) (*string, error) {
128153
cfg, err := getConfig(ctx, region)

pkg/provider/aws/data/spot.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -462,9 +462,23 @@ type placementScoreResult struct {
462462
// skipped. Returns a map of region → AZ scores filtered to those meeting minPlacementScore.
463463
func getPlacementScores(args placementScoreArgs, regions []string) (map[string][]placementScoreResult, error) {
464464
azsByRegion := describeAvailabilityZonesByRegions(args.ctx, regions)
465+
// Restrict placement score request to regions where AZ IDs can be resolved.
466+
// If DescribeAvailabilityZones failed for a region (SCP restriction, region not
467+
// enabled, etc.) scores returned for that region cannot be matched to an AZ name.
468+
resolvedRegions := make([]string, 0, len(regions))
469+
for _, r := range regions {
470+
if _, ok := azsByRegion[r]; ok {
471+
resolvedRegions = append(resolvedRegions, r)
472+
} else {
473+
logging.Debugf("excluding region %s from spot search: AZ information unavailable", r)
474+
}
475+
}
476+
if len(resolvedRegions) == 0 {
477+
return nil, fmt.Errorf("no regions with resolvable AZ information available for spot search")
478+
}
465479
var lastErr error
466480
for _, apiRegion := range args.apiRegions {
467-
result, err := placementScoresViaRegion(apiRegion, args, regions, azsByRegion)
481+
result, err := placementScoresViaRegion(apiRegion, args, resolvedRegions, azsByRegion)
468482
if err != nil {
469483
logging.Debugf("placement score API unavailable in region %s: %v, trying next", apiRegion, err)
470484
lastErr = err

0 commit comments

Comments
 (0)