From 63e8e03b5703e80791c9f8a097b77fe3b3f4190d Mon Sep 17 00:00:00 2001 From: Andrew Karpow Date: Mon, 4 May 2026 14:40:04 -0400 Subject: [PATCH] fix(akamai): add propagation timeout to prevent worker thread from hanging The Akamai driver's WorkerThread could block indefinitely waiting for GTM propagation status to leave PENDING state. This starved all other sync operations and caused the driver to appear hung. Add a configurable PropagationTimeout (default 300s) that breaks out of the polling loop if exceeded, allowing the sync loop to continue. --- internal/config/config.go | 5 +++-- internal/driver/akamai/datacenter.go | 5 +++++ internal/driver/akamai/domain.go | 5 +++++ internal/driver/akamai/geomap.go | 5 +++++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 351494b1..368e9fa8 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -220,8 +220,9 @@ type AkamaiConfig struct { Domain string `yaml:"domain" description:"Traffic Management Domain to use (e.g. production.akadns.net)."` DomainType string `yaml:"domain_type" description:"Indicates the type of domain available based on your contract, defaults to autodetect. Either failover-only, static, weighted, basic, or full."` ContractId string `yaml:"contract_id" description:"Indicated the contract id to use, autodetects if only one contract is associated."` - SyncInterval int64 `yaml:"sync_interval" default:"30" description:"Sync interval for checking for pending updates"` - MemberStatusInterval int64 `yaml:"member_status_interval" default:"60" description:"Sync interval for checking for member status"` + SyncInterval int64 `yaml:"sync_interval" default:"30" description:"Sync interval for checking for pending updates"` + MemberStatusInterval int64 `yaml:"member_status_interval" default:"60" description:"Sync interval for checking for member status"` + PropagationTimeout int64 `yaml:"propagation_timeout" default:"300" description:"Maximum time in seconds to wait for Akamai propagation before giving up"` } type Audit struct { diff --git a/internal/driver/akamai/datacenter.go b/internal/driver/akamai/datacenter.go index a6a29f09..cc8b6b78 100644 --- a/internal/driver/akamai/datacenter.go +++ b/internal/driver/akamai/datacenter.go @@ -114,7 +114,12 @@ func (s *AkamaiAgent) FetchAndSyncDatacenters(datacenters []string, force bool) // Wait for status propagation var status string + deadline := time.Now().Add(time.Duration(config.Global.AkamaiConfig.PropagationTimeout) * time.Second) for ok := true; ok; ok = status == "PENDING" { + if time.Now().After(deadline) { + log.Errorf("FetchAndSyncDatacenters: propagation timeout after %ds", config.Global.AkamaiConfig.PropagationTimeout) + break + } time.Sleep(5 * time.Second) status, err = s.syncProvisioningStatus(nil) if err != nil { diff --git a/internal/driver/akamai/domain.go b/internal/driver/akamai/domain.go index e4b1ea0d..813c4b9c 100644 --- a/internal/driver/akamai/domain.go +++ b/internal/driver/akamai/domain.go @@ -107,7 +107,12 @@ func (s *AkamaiAgent) FetchAndSyncDomains(domains []string, force bool) error { // Wait for status propagation var status string + deadline := time.Now().Add(time.Duration(config.Global.AkamaiConfig.PropagationTimeout) * time.Second) for ok := true; ok; ok = status == "PENDING" { + if time.Now().After(deadline) { + log.Errorf("domainSync(%s): propagation timeout after %ds", domain.Id, config.Global.AkamaiConfig.PropagationTimeout) + break + } time.Sleep(5 * time.Second) status, err = s.syncProvisioningStatus(domain) if err != nil { diff --git a/internal/driver/akamai/geomap.go b/internal/driver/akamai/geomap.go index a7aeb533..36d38056 100644 --- a/internal/driver/akamai/geomap.go +++ b/internal/driver/akamai/geomap.go @@ -110,7 +110,12 @@ func (s *AkamaiAgent) FetchAndSyncGeomaps(geomaps []string, force bool) error { // Wait for status propagation var status string + deadline := time.Now().Add(time.Duration(config.Global.AkamaiConfig.PropagationTimeout) * time.Second) for ok := true; ok; ok = status == "PENDING" { + if time.Now().After(deadline) { + log.Errorf("FetchAndSyncGeomaps: propagation timeout after %ds", config.Global.AkamaiConfig.PropagationTimeout) + break + } time.Sleep(5 * time.Second) status, err = s.syncProvisioningStatus(nil) if err != nil {