Open
Description
Title: aggregate cluster extension failover not working as expected
Description:
To feature IPv4/IPv6 dual stack support, Envoy's aggregate cluster functionality is used, so that the traffic can go through either the IPv4 or IPv6 networks, where the priority is defined using a cm-configurable option where we can set the preferred IP-version network.
The normal scenario works as expected, where the traffic is going through what is set to be the primary cluster in the aggregate cluster setup. However, when the primary cluster fails to connect, the secondary cluster (which should normally connect) is not being routed any traffic, and a 503 error is returned:
[debug][upstream][source/common/upstream/cluster_manager_impl.cc:1235] no healthy host for HTTP connection pool
[debug][http][source/common/http/conn_manager_impl.cc:1475] [C111][S15900032332570351549] Sending local reply with details no_healthy_upstream
Here is a sample of the aggregate cluster configuration :
{
"version_info": "3",
"cluster": {
"@type": "type.googleapis.com/envoy.api.v2.Cluster",
"name": "UI-Nudm_NF-udm_instance_1_FQDN-udm01.udm.5gc.mnc03.mcc262.3gppnetwork.org_192.168.5.14",
"type": "EDS",
"eds_cluster_config": {
"eds_config": {
"ads": {}
}
},
"connect_timeout": "5s",
"lb_policy": "LEAST_REQUEST",
"circuit_breakers": {
"thresholds": [
{
"max_requests": 10000,
"max_retries": 3
},
{
"priority": "HIGH",
"max_requests": 10000,
"max_retries": 3
}
]
},
"http2_protocol_options": {
"hpack_table_size": 4096
},
"upstream_bind_config": {
"source_address": {
"address": "192.168.5.14",
"port_value": 0
},
"socket_options": [
{
"name": "1",
"int_value": "252"
}
]
},
"transport_socket": {
"name": "tls",
"typed_config": {
"@type": "type.googleapis.com/envoy.api.v2.auth.UpstreamTlsContext",
"common_tls_context": {
"alpn_protocols": [
"h2"
]
}
}
}
},
"last_updated": "2020-06-12T13:51:04.562Z"
},
{
"version_info": "301",
"cluster": {
"@type": "type.googleapis.com/envoy.api.v2.Cluster",
"name": "UI-Nudm_NF-udm_instance_1_FQDN-udm01.udm.5gc.mnc03.mcc262.3gppnetwork.org_aggregate",
"connect_timeout": "5s",
"lb_policy": "CLUSTER_PROVIDED",
"http2_protocol_options": {
"hpack_table_size": 4096
},
"cluster_type": {
"name": "envoy.clusters.aggregate",
"typed_config": {
"@type": "type.googleapis.com/envoy.config.cluster.aggregate.v2alpha.ClusterConfig",
"clusters": [
"UI-Nudm_NF-udm_instance_1_FQDN-udm01.udm.5gc.mnc03.mcc262.3gppnetwork.org_192.168.5.14",
"UI-Nudm_NF-udm_instance_1_FQDN-udm01.udm.5gc.mnc03.mcc262.3gppnetwork.org_fc00::192:168:5:e"
]
}
}
},
"last_updated": "2020-06-16T11:48:36.453Z"
},
{
"version_info": "3",
"cluster": {
"@type": "type.googleapis.com/envoy.api.v2.Cluster",
"name": "UI-Nudm_NF-udm_instance_1_FQDN-udm01.udm.5gc.mnc03.mcc262.3gppnetwork.org_fc00::192:168:5:e",
"type": "EDS",
"eds_cluster_config": {
"eds_config": {
"ads": {}
}
},
"connect_timeout": "5s",
"lb_policy": "LEAST_REQUEST",
"circuit_breakers": {
"thresholds": [
{
"max_requests": 10000,
"max_retries": 3
},
{
"priority": "HIGH",
"max_requests": 10000,
"max_retries": 3
}
]
},
"http2_protocol_options": {
"hpack_table_size": 4096
},
"upstream_bind_config": {
"source_address": {
"address": "fc00::192:168:5:e",
"port_value": 0
},
"socket_options": [
{
"name": "1",
"int_value": "252"
}
]
},
"transport_socket": {
"name": "tls",
"typed_config": {
"@type": "type.googleapis.com/envoy.api.v2.auth.UpstreamTlsContext",
"common_tls_context": {
"alpn_protocols": [
"h2"
]
}
}
}
},
"last_updated": "2020-06-12T13:51:04.593Z"
},
Thanks !