Skip to content

Commit 23f909b

Browse files
committed
allow from snat eip to fip eip
Signed-off-by: zbb88888 <jmdxjsjgcxy@gmail.com>
1 parent b0d58bb commit 23f909b

File tree

2 files changed

+277
-9
lines changed

2 files changed

+277
-9
lines changed

dist/images/vpcnatgateway/nat-gateway.sh

Lines changed: 129 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ function show_help() {
7575
echo " dnat-del - Delete DNAT rule"
7676
echo " snat-add - Add SNAT rule"
7777
echo " snat-del - Delete SNAT rule"
78+
echo " hairpin-snat-add - Add hairpin SNAT rule for internal FIP access"
79+
echo " hairpin-snat-del - Delete hairpin SNAT rule"
7880
echo " qos-add - Add QoS rule"
7981
echo " qos-del - Delete QoS rule"
8082
echo " eip-ingress-qos-add - Add EIP ingress QoS"
@@ -162,6 +164,7 @@ function init() {
162164
$iptables_cmd -t nat -N EXCLUSIVE_SNAT # floatingIp SNAT
163165
$iptables_cmd -t nat -N SHARED_DNAT
164166
$iptables_cmd -t nat -N SHARED_SNAT
167+
$iptables_cmd -t nat -N HAIRPIN_SNAT
165168

166169
$iptables_cmd -t nat -A PREROUTING -j DNAT_FILTER
167170
$iptables_cmd -t nat -A DNAT_FILTER -j EXCLUSIVE_DNAT
@@ -170,6 +173,7 @@ function init() {
170173
$iptables_cmd -t nat -A POSTROUTING -j SNAT_FILTER
171174
$iptables_cmd -t nat -A SNAT_FILTER -j EXCLUSIVE_SNAT
172175
$iptables_cmd -t nat -A SNAT_FILTER -j SHARED_SNAT
176+
$iptables_cmd -t nat -A SNAT_FILTER -j HAIRPIN_SNAT
173177

174178
# Load IFB kernel module for ingress QoS traffic shaping
175179
# IFB (Intermediate Functional Block) is required for ingress rate limiting using HTB
@@ -280,6 +284,24 @@ function del_eip() {
280284
done
281285
}
282286

287+
# Check if the given CIDR exists in VPC_INTERFACE's routes (indicates it's an internal CIDR)
288+
# This is used to determine if hairpin SNAT is needed for a given SNAT rule
289+
# Args: $1 - CIDR to check (e.g., "10.0.1.0/24")
290+
# Returns: 0 if the CIDR is found in VPC_INTERFACE routes, 1 otherwise
291+
function is_internal_cidr() {
292+
local cidr="$1"
293+
if [ -z "$cidr" ]; then
294+
return 1
295+
fi
296+
# Escape '.' in CIDR for grep regex to avoid matching unintended characters
297+
# e.g., "10.0.1.0/24" -> "^10\.0\.1\.0/24 " matches exactly, not "10X0Y1Z0/24"
298+
local cidr_pattern="^${cidr//./\\.} "
299+
if ip -4 route show dev "$VPC_INTERFACE" | grep -q "$cidr_pattern"; then
300+
return 0
301+
fi
302+
return 1
303+
}
304+
283305
function add_floating_ip() {
284306
# make sure inited
285307
check_inited
@@ -316,33 +338,124 @@ function del_floating_ip() {
316338
function add_snat() {
317339
# make sure inited
318340
check_inited
319-
# iptables -t nat -F SHARED_SNAT
341+
local all_shared_snat_rules
342+
all_shared_snat_rules=$($iptables_save_cmd -t nat | grep SHARED_SNAT)
343+
declare -A internal_cidrs_cache
320344
for rule in $@
321345
do
322346
arr=(${rule//,/ })
323347
eip=(${arr[0]//\// })
324348
internalCIDR=${arr[1]}
325349
randomFullyOption=${arr[2]}
326-
# check if already exist
327-
$iptables_save_cmd | grep SHARED_SNAT | grep "\-s $internalCIDR" | grep "source $eip" && exit 0
328-
exec_cmd "$iptables_cmd -t nat -A SHARED_SNAT -o $EXTERNAL_INTERFACE -s $internalCIDR -j SNAT --to-source $eip $randomFullyOption"
350+
# check if already exist, skip adding if exists (idempotent)
351+
ruleMatch=$(echo "$all_shared_snat_rules" | grep -w -- "-s $internalCIDR" | grep -E -- "--to-source $eip(\$| )")
352+
if [ -z "$ruleMatch" ]; then
353+
exec_cmd "$iptables_cmd -t nat -A SHARED_SNAT -o $EXTERNAL_INTERFACE -s $internalCIDR -j SNAT --to-source $eip $randomFullyOption"
354+
fi
355+
# Add hairpin SNAT when internalCIDR is routed via VPC_INTERFACE
356+
# This enables internal VMs to access other internal VMs via FIP
357+
if [ -z "${internal_cidrs_cache[$internalCIDR]+_}" ]; then
358+
if is_internal_cidr "$internalCIDR"; then
359+
internal_cidrs_cache[$internalCIDR]=true
360+
else
361+
internal_cidrs_cache[$internalCIDR]=false
362+
fi
363+
fi
364+
if [ "${internal_cidrs_cache[$internalCIDR]}" = true ]; then
365+
echo "SNAT cidr $internalCIDR is internal, adding hairpin SNAT with EIP $eip"
366+
add_hairpin_snat "$eip,$internalCIDR,$randomFullyOption"
367+
fi
329368
done
330369
}
331370
function del_snat() {
332371
# make sure inited
333372
check_inited
334-
# iptables -t nat -F SHARED_SNAT
373+
local all_shared_snat_rules
374+
all_shared_snat_rules=$($iptables_save_cmd -t nat | grep SHARED_SNAT)
375+
declare -A internal_cidrs_cache
335376
for rule in $@
336377
do
337378
arr=(${rule//,/ })
338379
eip=(${arr[0]//\// })
339380
internalCIDR=${arr[1]}
340381
# check if already exist
341-
ruleMatch=$($iptables_save_cmd | grep SHARED_SNAT | grep "\-s $internalCIDR" | grep "source $eip")
342-
if [ "$?" -eq 0 ];then
343-
ruleMatch=$(echo $ruleMatch | sed 's/-A //')
382+
ruleMatch=$(echo "$all_shared_snat_rules" | grep -w -- "-s $internalCIDR" | grep -E -- "--to-source $eip(\$| )" | head -1)
383+
if [ -n "$ruleMatch" ]; then
384+
ruleMatch=$(echo "$ruleMatch" | sed 's/-A //')
344385
exec_cmd "$iptables_cmd -t nat -D $ruleMatch"
345386
fi
387+
# Remove the corresponding hairpin SNAT rule (1:1 with SNAT).
388+
if [ -z "${internal_cidrs_cache[$internalCIDR]+_}" ]; then
389+
if is_internal_cidr "$internalCIDR"; then
390+
internal_cidrs_cache[$internalCIDR]=true
391+
else
392+
internal_cidrs_cache[$internalCIDR]=false
393+
fi
394+
fi
395+
if [ "${internal_cidrs_cache[$internalCIDR]}" = true ]; then
396+
del_hairpin_snat "$eip,$internalCIDR"
397+
fi
398+
done
399+
}
400+
401+
# Hairpin SNAT: Enables internal VM to access another internal VM's FIP
402+
# Packet flow when VM A accesses VM B's EIP:
403+
# 1. VM A (10.0.1.6) -> EIP (10.1.69.216) arrives at NAT GW
404+
# 2. DNAT translates destination to VM B's internal IP (10.0.1.11)
405+
# 3. Without hairpin SNAT, reply from VM B goes directly to VM A (same subnet),
406+
# but VM A expects reply from EIP, causing asymmetric routing failure
407+
# 4. Hairpin SNAT translates source to EIP, ensuring symmetric return path via NAT GW
408+
#
409+
# Hairpin SNAT mirrors SHARED_SNAT 1:1: each SNAT rule creates a corresponding
410+
# hairpin rule with the same EIP and --random-fully option. Multiple SNATs with
411+
# different EIPs for the same CIDR are supported (for port exhaustion mitigation).
412+
#
413+
# Rule format: eip,internalCIDR[,--random-fully]
414+
# Example: 10.1.69.219,10.0.1.0/24,--random-fully
415+
# Creates: iptables -t nat -A HAIRPIN_SNAT -s 10.0.1.0/24 -d 10.0.1.0/24 -j SNAT --to-source 10.1.69.219 --random-fully
416+
function add_hairpin_snat() {
417+
# make sure inited
418+
check_inited
419+
local all_hairpin_rules
420+
all_hairpin_rules=$($iptables_save_cmd -t nat | grep HAIRPIN_SNAT)
421+
for rule in $@
422+
do
423+
arr=(${rule//,/ })
424+
eip=(${arr[0]//\// })
425+
internalCIDR=${arr[1]}
426+
randomFullyOption=${arr[2]}
427+
428+
# Check if this exact rule already exists (idempotent)
429+
if echo "$all_hairpin_rules" | grep -w -- "-s $internalCIDR" | grep -w -- "-d $internalCIDR" | grep -qE -- "--to-source $eip(\$| )"; then
430+
echo "Hairpin SNAT rule for $internalCIDR with EIP $eip already exists, skipping"
431+
continue
432+
fi
433+
434+
exec_cmd "$iptables_cmd -t nat -A HAIRPIN_SNAT -s $internalCIDR -d $internalCIDR -j SNAT --to-source $eip $randomFullyOption"
435+
echo "Hairpin SNAT rule added: $internalCIDR -> $eip"
436+
done
437+
}
438+
439+
# Delete a hairpin SNAT rule.
440+
# Args: eip,internalCIDR (comma-separated)
441+
function del_hairpin_snat() {
442+
# make sure inited
443+
check_inited
444+
local all_hairpin_rules
445+
all_hairpin_rules=$($iptables_save_cmd -t nat | grep HAIRPIN_SNAT)
446+
for rule in $@
447+
do
448+
arr=(${rule//,/ })
449+
eip=(${arr[0]//\// })
450+
internalCIDR=${arr[1]}
451+
# Use iptables-save output to construct -D command (preserves --random-fully etc.)
452+
local ruleMatch
453+
ruleMatch=$(echo "$all_hairpin_rules" | grep -w -- "-s $internalCIDR" | grep -w -- "-d $internalCIDR" | grep -E -- "--to-source $eip(\$| )" | head -1)
454+
if [ -n "$ruleMatch" ]; then
455+
ruleMatch=$(echo "$ruleMatch" | sed 's/-A //')
456+
exec_cmd "$iptables_cmd -t nat -D $ruleMatch"
457+
echo "Hairpin SNAT rule deleted: $internalCIDR -> $eip"
458+
fi
346459
done
347460
}
348461

@@ -1435,6 +1548,14 @@ case $opt in
14351548
echo "snat-del $rules"
14361549
del_snat $rules
14371550
;;
1551+
hairpin-snat-add)
1552+
echo "hairpin-snat-add $rules"
1553+
add_hairpin_snat $rules
1554+
;;
1555+
hairpin-snat-del)
1556+
echo "hairpin-snat-del $rules"
1557+
del_hairpin_snat $rules
1558+
;;
14381559
floating-ip-add)
14391560
echo "floating-ip-add $rules"
14401561
add_floating_ip $rules

test/e2e/iptables-vpc-nat-gw/e2e_test.go

Lines changed: 148 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"encoding/json"
66
"flag"
77
"fmt"
8+
"regexp"
89
"strings"
910
"testing"
1011
"time"
@@ -249,6 +250,48 @@ func verifySubnetStatusAfterEIPOperation(subnetClient *framework.SubnetClient, s
249250
}
250251
}
251252

253+
// iptablesSaveNat returns the iptables-save output from the NAT gateway pod,
254+
// using the exact same detection logic as nat-gateway.sh to determine whether
255+
// to use iptables-legacy-save or iptables-save (nft backend).
256+
func iptablesSaveNat(natGwPodName string) string {
257+
// Replicate nat-gateway.sh detection: if iptables-legacy -t nat -S INPUT 1 succeeds,
258+
// rules were written via iptables-legacy, so use iptables-legacy-save to read them.
259+
// NOTE: KubectlExec joins args with space and passes to "/bin/sh -c", so pass
260+
// the entire script as a single string to avoid double shell wrapping.
261+
cmd := []string{"if iptables-legacy -t nat -S INPUT 1 2>/dev/null; then iptables-legacy-save -t nat; else iptables-save -t nat; fi"}
262+
stdout, _, err := framework.KubectlExec(framework.KubeOvnNamespace, natGwPodName, cmd...)
263+
framework.ExpectNoError(err, "failed to exec iptables-save in NAT gateway pod %s", natGwPodName)
264+
return string(stdout)
265+
}
266+
267+
// hairpinSnatChainExists checks if the HAIRPIN_SNAT chain exists in the NAT gateway pod.
268+
// Returns false on older versions that don't support this feature.
269+
func hairpinSnatChainExists(natGwPodName string) bool {
270+
output := iptablesSaveNat(natGwPodName)
271+
return strings.Contains(output, ":HAIRPIN_SNAT") || strings.Contains(output, "-N HAIRPIN_SNAT")
272+
}
273+
274+
// hairpinSnatRuleExists checks if hairpin SNAT rule exists in the NAT gateway pod
275+
// for the given CIDR and specific EIP.
276+
// Uses regex with word boundaries to prevent partial IP/CIDR matching
277+
// (e.g. EIP 10.1.69.21 must not match rule for 10.1.69.219).
278+
// Returns true if rule exists, false otherwise (including when HAIRPIN_SNAT chain doesn't exist).
279+
func hairpinSnatRuleExists(natGwPodName, cidr, eip string) bool {
280+
output := iptablesSaveNat(natGwPodName)
281+
if !strings.Contains(output, ":HAIRPIN_SNAT") && !strings.Contains(output, "-N HAIRPIN_SNAT") {
282+
return false
283+
}
284+
285+
// Use regex with \b word boundaries for precise matching, and allow
286+
// optional trailing args like --random-fully after the EIP.
287+
hairpinRulePattern := fmt.Sprintf(
288+
`-A HAIRPIN_SNAT -s \b%s\b -d \b%s\b -j SNAT --to-source \b%s\b`,
289+
regexp.QuoteMeta(cidr), regexp.QuoteMeta(cidr), regexp.QuoteMeta(eip),
290+
)
291+
re := regexp.MustCompile(hairpinRulePattern)
292+
return re.MatchString(output)
293+
}
294+
252295
var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
253296
f := framework.NewDefaultFramework("iptables-vpc-nat-gw")
254297

@@ -264,6 +307,7 @@ var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
264307
var iptablesFIPClient *framework.IptablesFIPClient
265308
var iptablesSnatRuleClient *framework.IptablesSnatClient
266309
var iptablesDnatRuleClient *framework.IptablesDnatClient
310+
var podClient *framework.PodClient
267311

268312
var dockerExtNet1Network *dockernetwork.Inspect
269313
var net1NicName string
@@ -421,6 +465,7 @@ var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
421465
vpcName = "vpc-" + randomSuffix
422466
vpcNatGwName = "gw-" + randomSuffix
423467
overlaySubnetName = "overlay-subnet-" + randomSuffix
468+
podClient = f.PodClient()
424469
})
425470

426471
framework.ConformanceIt("[1] change gateway image and custom annotations", func() {
@@ -543,6 +588,81 @@ var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
543588
iptablesSnatRuleClient.DeleteSync(snatName)
544589
})
545590

591+
// Verify hairpin SNAT rule is automatically created for internal CIDR
592+
ginkgo.By("[hairpin SNAT] Verifying hairpin SNAT rule exists after SNAT creation")
593+
vpcNatGwPodName := util.GenNatGwPodName(vpcNatGwName)
594+
snatEip = iptablesEIPClient.Get(snatEipName)
595+
if !hairpinSnatChainExists(vpcNatGwPodName) {
596+
framework.Logf("HAIRPIN_SNAT chain not found, skipping hairpin SNAT verification (feature requires v1.15+)")
597+
} else {
598+
gomega.Eventually(func() bool {
599+
return hairpinSnatRuleExists(vpcNatGwPodName, overlaySubnetV4Cidr, snatEip.Status.IP)
600+
}, 30*time.Second, 2*time.Second).Should(gomega.BeTrue(),
601+
"Hairpin SNAT rule should be created after SNAT creation")
602+
603+
// Verify real data-path: internal pod accessing another internal pod via FIP EIP
604+
// Packet flow: client -> NAT GW (DNAT to serverIP + hairpin SNAT to EIP) -> server -> NAT GW (un-SNAT/DNAT) -> client
605+
ginkgo.By("[hairpin SNAT] Verifying data-path connectivity: internal pod accessing another internal pod via FIP EIP")
606+
serverPodName := "server-" + randomSuffix
607+
clientPodName := "client-" + randomSuffix
608+
hairpinFipEipName := "hairpin-fip-eip-" + randomSuffix
609+
hairpinFipName := "hairpin-fip-" + randomSuffix
610+
611+
// [hairpin SNAT] Create server pod in overlay subnet with auto-assigned IP
612+
serverAnnotations := map[string]string{
613+
util.LogicalSwitchAnnotation: overlaySubnetName,
614+
}
615+
serverPort := "8080"
616+
serverArgs := []string{"netexec", "--http-port", serverPort}
617+
serverPod := framework.MakePod(f.Namespace.Name, serverPodName, nil, serverAnnotations, framework.AgnhostImage, nil, serverArgs)
618+
_ = podClient.CreateSync(serverPod)
619+
ginkgo.DeferCleanup(func() {
620+
ginkgo.By("Cleaning up server pod " + serverPodName)
621+
podClient.DeleteSync(serverPodName)
622+
})
623+
624+
// Get server pod's auto-assigned IP for FIP binding
625+
createdServerPod := podClient.GetPod(serverPodName)
626+
serverPodIP := createdServerPod.Annotations[util.IPAddressAnnotation]
627+
framework.ExpectNotEmpty(serverPodIP, "server pod should have an IP assigned")
628+
framework.Logf("Server pod %s has IP %s", serverPodName, serverPodIP)
629+
630+
// [hairpin SNAT] Create a dedicated EIP and FIP for the server pod
631+
hairpinFipEip := framework.MakeIptablesEIP(hairpinFipEipName, "", "", "", vpcNatGwName, "", "")
632+
_ = iptablesEIPClient.CreateSync(hairpinFipEip)
633+
ginkgo.DeferCleanup(func() {
634+
ginkgo.By("Cleaning up hairpin FIP EIP " + hairpinFipEipName)
635+
iptablesEIPClient.DeleteSync(hairpinFipEipName)
636+
})
637+
hairpinFipEip = iptablesEIPClient.Get(hairpinFipEipName)
638+
framework.ExpectNotEmpty(hairpinFipEip.Status.IP, "hairpin FIP EIP should have an IP assigned")
639+
640+
hairpinFip := framework.MakeIptablesFIPRule(hairpinFipName, hairpinFipEipName, serverPodIP)
641+
_ = iptablesFIPClient.CreateSync(hairpinFip)
642+
ginkgo.DeferCleanup(func() {
643+
ginkgo.By("Cleaning up hairpin FIP " + hairpinFipName)
644+
iptablesFIPClient.DeleteSync(hairpinFipName)
645+
})
646+
647+
// [hairpin SNAT] Create client pod in same subnet
648+
clientAnnotations := map[string]string{
649+
util.LogicalSwitchAnnotation: overlaySubnetName,
650+
}
651+
clientPod := framework.MakePod(f.Namespace.Name, clientPodName, nil, clientAnnotations, framework.AgnhostImage, nil, []string{"pause"})
652+
_ = podClient.CreateSync(clientPod)
653+
ginkgo.DeferCleanup(func() {
654+
ginkgo.By("Cleaning up client pod " + clientPodName)
655+
podClient.DeleteSync(clientPodName)
656+
})
657+
658+
// [hairpin SNAT] Test connectivity: client -> FIP EIP -> server (same subnet)
659+
ginkgo.By("[hairpin SNAT] Checking data-path: client pod -> FIP EIP " + hairpinFipEip.Status.IP + " -> server pod")
660+
cmd := []string{"curl", "-m", "10", fmt.Sprintf("http://%s:%s/clientip", hairpinFipEip.Status.IP, serverPort)}
661+
output, _, err := framework.KubectlExec(f.Namespace.Name, clientPodName, cmd...)
662+
framework.ExpectNoError(err, "[hairpin SNAT] client pod should reach server pod via FIP EIP")
663+
framework.Logf("[hairpin SNAT] connectivity verified, response: %s", string(output))
664+
}
665+
546666
ginkgo.By("Creating iptables vip for dnat")
547667
dnatVip := framework.MakeVip(f.Namespace.Name, dnatVipName, overlaySubnetName, "", "", "")
548668
_ = vipClient.CreateSync(dnatVip)
@@ -618,8 +738,19 @@ var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
618738
iptablesSnatRuleClient.DeleteSync(sharedEipSnatName)
619739
})
620740

621-
ginkgo.By("Get share eip")
741+
// Verify hairpin SNAT rule is created for the shared SNAT (same CIDR, different EIP).
742+
// Hairpin mirrors SNAT 1:1: each SNAT creates its own hairpin rule.
743+
ginkgo.By("Getting share eip")
622744
shareEip = iptablesEIPClient.Get(sharedEipName)
745+
framework.ExpectNotEmpty(shareEip.Status.IP, "shareEip.Status.IP should not be empty")
746+
if hairpinSnatChainExists(vpcNatGwPodName) {
747+
ginkgo.By("[hairpin SNAT] Verifying hairpin SNAT rule exists for the shared SNAT EIP")
748+
gomega.Eventually(func() bool {
749+
return hairpinSnatRuleExists(vpcNatGwPodName, overlaySubnetV4Cidr, shareEip.Status.IP)
750+
}, 30*time.Second, 2*time.Second).Should(gomega.BeTrue(),
751+
"Hairpin SNAT rule should be created for shared SNAT EIP")
752+
}
753+
623754
ginkgo.By("Get share dnat")
624755
shareDnat = iptablesDnatRuleClient.Get(sharedEipDnatName)
625756
ginkgo.By("Get share snat")
@@ -643,6 +774,22 @@ var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
643774
// make sure eip is shared
644775
nats := []string{util.DnatUsingEip, util.FipUsingEip, util.SnatUsingEip}
645776
framework.ExpectEqual(shareEip.Status.Nat, strings.Join(nats, ","))
777+
778+
// Verify hairpin SNAT rule cleanup when SNAT is deleted.
779+
// Hairpin lifecycle is 1:1 with SNAT: created together, deleted together.
780+
if hairpinSnatChainExists(vpcNatGwPodName) {
781+
ginkgo.By("[hairpin SNAT] Deleting SNAT to verify hairpin rule cleanup")
782+
iptablesSnatRuleClient.DeleteSync(snatName)
783+
ginkgo.By("[hairpin SNAT] Verifying hairpin rule for the deleted SNAT EIP is removed")
784+
gomega.Eventually(func() bool {
785+
return hairpinSnatRuleExists(vpcNatGwPodName, overlaySubnetV4Cidr, snatEip.Status.IP)
786+
}, 30*time.Second, 2*time.Second).Should(gomega.BeFalse(),
787+
"Hairpin SNAT rule should be deleted after SNAT deletion")
788+
ginkgo.By("[hairpin SNAT] Verifying hairpin rule for the shared SNAT EIP still exists")
789+
gomega.Expect(hairpinSnatRuleExists(vpcNatGwPodName, overlaySubnetV4Cidr, shareEip.Status.IP)).To(gomega.BeTrue(),
790+
"Hairpin SNAT rule for the shared SNAT EIP should NOT be affected by deleting a different SNAT")
791+
}
792+
646793
// All cleanup is handled by DeferCleanup above, no need for manual cleanup
647794
})
648795

0 commit comments

Comments
 (0)