@@ -176,6 +176,8 @@ type AWSProvider struct {
176176 zoneTagFilter provider.ZoneTagFilter
177177 preferCNAME bool
178178 zonesCache * zonesListCache
179+ // queue for collecting changes to submit them in the next iteration, but after all other changes
180+ failedChangesQueue map [string ]Route53Changes
179181}
180182
181183// AWSConfig contains configuration to create a new AWS provider.
@@ -240,6 +242,7 @@ func NewAWSProvider(awsConfig AWSConfig) (*AWSProvider, error) {
240242 preferCNAME : awsConfig .PreferCNAME ,
241243 dryRun : awsConfig .DryRun ,
242244 zonesCache : & zonesListCache {duration : awsConfig .ZoneCacheDuration },
245+ failedChangesQueue : make (map [string ]Route53Changes ),
243246 }
244247
245248 return provider , nil
@@ -556,9 +559,16 @@ func (p *AWSProvider) submitChanges(ctx context.Context, changes Route53Changes,
556559 for z , cs := range changesByZone {
557560 var failedUpdate bool
558561
559- batchCs := batchChangeSet (cs , p .batchChangeSize )
562+ // group changes into new changes and into changes that failed in a previous iteration and are retried
563+ retriedChanges , newChanges := findChangesInQueue (cs , p .failedChangesQueue [z ])
564+ p .failedChangesQueue [z ] = nil
560565
566+ batchCs := append (batchChangeSet (newChanges , p .batchChangeSize ), batchChangeSet (retriedChanges , p .batchChangeSize )... )
561567 for i , b := range batchCs {
568+ if len (b ) == 0 {
569+ continue
570+ }
571+
562572 for _ , c := range b {
563573 log .Infof ("Desired change: %s %s %s [Id: %s]" , * c .Action , * c .ResourceRecordSet .Name , * c .ResourceRecordSet .Type , z )
564574 }
@@ -571,13 +581,41 @@ func (p *AWSProvider) submitChanges(ctx context.Context, changes Route53Changes,
571581 },
572582 }
573583
584+ successfulChanges := 0
585+
574586 if _ , err := p .client .ChangeResourceRecordSetsWithContext (ctx , params ); err != nil {
575- log .Errorf ("Failure in zone %s [Id: %s]" , aws .StringValue (zones [z ].Name ), z )
576- log .Error (err ) // TODO(ideahitme): consider changing the interface in cases when this error might be a concern for other components
577- failedUpdate = true
587+ log .Errorf ("Failure in zone %s [Id: %s] when submitting change batch: %v" , aws .StringValue (zones [z ].Name ), z , err )
588+
589+ changesByOwnership := groupChangesByNameAndOwnershipRelation (b )
590+
591+ if len (changesByOwnership ) > 1 {
592+ log .Debug ("Trying to submit change sets one-by-one instead" )
593+
594+ for _ , changes := range changesByOwnership {
595+ for _ , c := range changes {
596+ log .Debugf ("Desired change: %s %s %s [Id: %s]" , * c .Action , * c .ResourceRecordSet .Name , * c .ResourceRecordSet .Type , z )
597+ }
598+ params .ChangeBatch = & route53.ChangeBatch {
599+ Changes : changes .Route53Changes (),
600+ }
601+ if _ , err := p .client .ChangeResourceRecordSetsWithContext (ctx , params ); err != nil {
602+ failedUpdate = true
603+ log .Errorf ("Failed submitting change (error: %v), it will be retried in a separate change batch in the next iteration" , err )
604+ p .failedChangesQueue [z ] = append (p .failedChangesQueue [z ], changes ... )
605+ } else {
606+ successfulChanges = successfulChanges + len (changes )
607+ }
608+ }
609+ } else {
610+ failedUpdate = true
611+ }
578612 } else {
613+ successfulChanges = len (b )
614+ }
615+
616+ if successfulChanges > 0 {
579617 // z is the R53 Hosted Zone ID already as aws.StringValue
580- log .Infof ("%d record(s) in zone %s [Id: %s] were successfully updated" , len ( b ) , aws .StringValue (zones [z ].Name ), z )
618+ log .Infof ("%d record(s) in zone %s [Id: %s] were successfully updated" , successfulChanges , aws .StringValue (zones [z ].Name ), z )
581619 }
582620
583621 if i != len (batchCs )- 1 {
@@ -736,9 +774,51 @@ func (p *AWSProvider) newChange(action string, ep *endpoint.Endpoint) (*Route53C
736774 change .ResourceRecordSet .HealthCheckId = aws .String (prop .Value )
737775 }
738776
777+ if ownedRecord , ok := ep .Labels [endpoint .OwnedRecordLabelKey ]; ok {
778+ change .OwnedRecord = ownedRecord
779+ }
780+
739781 return change , dualstack
740782}
741783
784+ // searches for `changes` that are contained in `queue` and returns the `changes` separated by whether they were found in the queue (`foundChanges`) or not (`notFoundChanges`)
785+ func findChangesInQueue (changes Route53Changes , queue Route53Changes ) (foundChanges , notFoundChanges Route53Changes ) {
786+ if queue == nil {
787+ return Route53Changes {}, changes
788+ }
789+
790+ for _ , c := range changes {
791+ found := false
792+ for _ , qc := range queue {
793+ if c == qc {
794+ foundChanges = append (foundChanges , c )
795+ found = true
796+ break
797+ }
798+ }
799+ if ! found {
800+ notFoundChanges = append (notFoundChanges , c )
801+ }
802+ }
803+
804+ return
805+ }
806+
807+ // group the given changes by name and ownership relation to ensure these are always submitted in the same transaction to Route53;
808+ // grouping by name is done to always submit changes with the same name but different set identifier together,
809+ // grouping by ownership relation is done to always submit changes of records and e.g. their corresponding TXT registry records together
810+ func groupChangesByNameAndOwnershipRelation (cs Route53Changes ) map [string ]Route53Changes {
811+ changesByOwnership := make (map [string ]Route53Changes )
812+ for _ , v := range cs {
813+ key := v .OwnedRecord
814+ if key == "" {
815+ key = aws .StringValue (v .ResourceRecordSet .Name )
816+ }
817+ changesByOwnership [key ] = append (changesByOwnership [key ], v )
818+ }
819+ return changesByOwnership
820+ }
821+
742822func (p * AWSProvider ) tagsForZone (ctx context.Context , zoneID string ) (map [string ]string , error ) {
743823 response , err := p .client .ListTagsForResourceWithContext (ctx , & route53.ListTagsForResourceInput {
744824 ResourceType : aws .String ("hostedzone" ),
@@ -762,41 +842,34 @@ func batchChangeSet(cs Route53Changes, batchSize int) []Route53Changes {
762842
763843 batchChanges := make ([]Route53Changes , 0 )
764844
765- changesByName := make (map [string ]Route53Changes )
766- for _ , v := range cs {
767- changesByName [* v .ResourceRecordSet .Name ] = append (changesByName [* v .ResourceRecordSet .Name ], v )
768- }
845+ changesByOwnership := groupChangesByNameAndOwnershipRelation (cs )
769846
770847 names := make ([]string , 0 )
771- for v := range changesByName {
848+ for v := range changesByOwnership {
772849 names = append (names , v )
773850 }
774851 sort .Strings (names )
775852
776- for _ , name := range names {
777- totalChangesByName := len (changesByName [name ])
778-
779- if totalChangesByName > batchSize {
780- log .Warnf ("Total changes for %s exceeds max batch size of %d, total changes: %d" , name ,
781- batchSize , totalChangesByName )
853+ currentBatch := Route53Changes {}
854+ for k , name := range names {
855+ v := changesByOwnership [name ]
856+ if len (v ) > batchSize {
857+ log .Warnf ("Total changes for %v exceeds max batch size of %d, total changes: %d; changes will not be performed" , k , batchSize , len (v ))
782858 continue
783859 }
784860
785- var existingBatch bool
786- for i , b := range batchChanges {
787- if len (b )+ totalChangesByName <= batchSize {
788- batchChanges [i ] = append (batchChanges [i ], changesByName [name ]... )
789- existingBatch = true
790- break
791- }
792- }
793- if ! existingBatch {
794- batchChanges = append (batchChanges , changesByName [name ])
861+ if len (currentBatch )+ len (v ) > batchSize {
862+ // currentBatch would be too large if we add this changeset;
863+ // add currentBatch to batchChanges and start a new currentBatch
864+ batchChanges = append (batchChanges , sortChangesByActionNameType (currentBatch ))
865+ currentBatch = append (Route53Changes {}, v ... )
866+ } else {
867+ currentBatch = append (currentBatch , v ... )
795868 }
796869 }
797-
798- for i , batch := range batchChanges {
799- batchChanges [ i ] = sortChangesByActionNameType (batch )
870+ if len ( currentBatch ) > 0 {
871+ // add final currentBatch
872+ batchChanges = append ( batchChanges , sortChangesByActionNameType (currentBatch ) )
800873 }
801874
802875 return batchChanges
0 commit comments