Skip to content

Commit 03d36bf

Browse files
committed
allow from snat eip to fip eip
Signed-off-by: zbb88888 <jmdxjsjgcxy@gmail.com>
1 parent 830362a commit 03d36bf

File tree

2 files changed

+251
-9
lines changed

2 files changed

+251
-9
lines changed

dist/images/vpcnatgateway/nat-gateway.sh

Lines changed: 110 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ function show_help() {
7575
echo " dnat-del - Delete DNAT rule"
7676
echo " snat-add - Add SNAT rule"
7777
echo " snat-del - Delete SNAT rule"
78+
echo " hairpin-snat-add - Add hairpin SNAT rule for internal FIP access"
79+
echo " hairpin-snat-del - Delete hairpin SNAT rule"
7880
echo " qos-add - Add QoS rule"
7981
echo " qos-del - Delete QoS rule"
8082
echo " eip-ingress-qos-add - Add EIP ingress QoS"
@@ -162,6 +164,7 @@ function init() {
162164
$iptables_cmd -t nat -N EXCLUSIVE_SNAT # floatingIp SNAT
163165
$iptables_cmd -t nat -N SHARED_DNAT
164166
$iptables_cmd -t nat -N SHARED_SNAT
167+
$iptables_cmd -t nat -N HAIRPIN_SNAT
165168

166169
$iptables_cmd -t nat -A PREROUTING -j DNAT_FILTER
167170
$iptables_cmd -t nat -A DNAT_FILTER -j EXCLUSIVE_DNAT
@@ -170,6 +173,7 @@ function init() {
170173
$iptables_cmd -t nat -A POSTROUTING -j SNAT_FILTER
171174
$iptables_cmd -t nat -A SNAT_FILTER -j EXCLUSIVE_SNAT
172175
$iptables_cmd -t nat -A SNAT_FILTER -j SHARED_SNAT
176+
$iptables_cmd -t nat -A SNAT_FILTER -j HAIRPIN_SNAT
173177

174178
# Load IFB kernel module for ingress QoS traffic shaping
175179
# IFB (Intermediate Functional Block) is required for ingress rate limiting using HTB
@@ -280,6 +284,24 @@ function del_eip() {
280284
done
281285
}
282286

287+
# Check if the given CIDR exists in VPC_INTERFACE's routes (indicates it's an internal CIDR)
288+
# This is used to determine if hairpin SNAT is needed for a given SNAT rule
289+
# Args: $1 - CIDR to check (e.g., "10.0.1.0/24")
290+
# Returns: 0 if the CIDR is found in VPC_INTERFACE routes, 1 otherwise
291+
function is_internal_cidr() {
292+
local cidr="$1"
293+
if [ -z "$cidr" ]; then
294+
return 1
295+
fi
296+
# Escape '.' in CIDR for grep regex to avoid matching unintended characters
297+
# e.g., "10.0.1.0/24" -> "^10\.0\.1\.0/24 " matches exactly, not "10X0Y1Z0/24"
298+
local cidr_pattern="^${cidr//./\\.} "
299+
if ip -4 route show dev "$VPC_INTERFACE" | grep -q "$cidr_pattern"; then
300+
return 0
301+
fi
302+
return 1
303+
}
304+
283305
function add_floating_ip() {
284306
# make sure inited
285307
check_inited
@@ -316,33 +338,105 @@ function del_floating_ip() {
316338
function add_snat() {
317339
# make sure inited
318340
check_inited
319-
# iptables -t nat -F SHARED_SNAT
341+
local all_shared_snat_rules
342+
all_shared_snat_rules=$($iptables_save_cmd | grep SHARED_SNAT)
320343
for rule in $@
321344
do
322345
arr=(${rule//,/ })
323346
eip=(${arr[0]//\// })
324347
internalCIDR=${arr[1]}
325348
randomFullyOption=${arr[2]}
326-
# check if already exist
327-
$iptables_save_cmd | grep SHARED_SNAT | grep "\-s $internalCIDR" | grep "source $eip" && exit 0
328-
exec_cmd "$iptables_cmd -t nat -A SHARED_SNAT -o $EXTERNAL_INTERFACE -s $internalCIDR -j SNAT --to-source $eip $randomFullyOption"
349+
# check if already exist, skip adding if exists (idempotent)
350+
ruleMatch=$(echo "$all_shared_snat_rules" | grep -w -- "-s $internalCIDR" | grep -E -- "--to-source $eip(\$| )")
351+
if [ -z "$ruleMatch" ]; then
352+
exec_cmd "$iptables_cmd -t nat -A SHARED_SNAT -o $EXTERNAL_INTERFACE -s $internalCIDR -j SNAT --to-source $eip $randomFullyOption"
353+
fi
354+
# Add hairpin SNAT when internalCIDR is routed via VPC_INTERFACE
355+
# This enables internal VMs to access other internal VMs via FIP
356+
if is_internal_cidr "$internalCIDR"; then
357+
echo "SNAT cidr $internalCIDR is internal, adding hairpin SNAT with EIP $eip"
358+
add_hairpin_snat "$eip,$internalCIDR,$randomFullyOption"
359+
fi
329360
done
330361
}
331362
function del_snat() {
332363
# make sure inited
333364
check_inited
334-
# iptables -t nat -F SHARED_SNAT
365+
local all_shared_snat_rules
366+
all_shared_snat_rules=$($iptables_save_cmd | grep SHARED_SNAT)
335367
for rule in $@
336368
do
337369
arr=(${rule//,/ })
338370
eip=(${arr[0]//\// })
339371
internalCIDR=${arr[1]}
340372
# check if already exist
341-
ruleMatch=$($iptables_save_cmd | grep SHARED_SNAT | grep "\-s $internalCIDR" | grep "source $eip")
342-
if [ "$?" -eq 0 ];then
343-
ruleMatch=$(echo $ruleMatch | sed 's/-A //')
373+
ruleMatch=$(echo "$all_shared_snat_rules" | grep -w -- "-s $internalCIDR" | grep -E -- "--to-source $eip(\$| )")
374+
if [ -n "$ruleMatch" ]; then
375+
ruleMatch=$(echo "$ruleMatch" | sed 's/-A //')
344376
exec_cmd "$iptables_cmd -t nat -D $ruleMatch"
345377
fi
378+
# Remove the corresponding hairpin SNAT rule (1:1 with SNAT).
379+
if is_internal_cidr "$internalCIDR"; then
380+
del_hairpin_snat "$eip,$internalCIDR"
381+
fi
382+
done
383+
}
384+
385+
# Hairpin SNAT: Enables internal VM to access another internal VM's FIP
386+
# Packet flow when VM A accesses VM B's EIP:
387+
# 1. VM A (10.0.1.6) -> EIP (10.1.69.216) arrives at NAT GW
388+
# 2. DNAT translates destination to VM B's internal IP (10.0.1.11)
389+
# 3. Without hairpin SNAT, reply from VM B goes directly to VM A (same subnet),
390+
# but VM A expects reply from EIP, causing asymmetric routing failure
391+
# 4. Hairpin SNAT translates source to EIP, ensuring symmetric return path via NAT GW
392+
#
393+
# Hairpin SNAT mirrors SHARED_SNAT 1:1: each SNAT rule creates a corresponding
394+
# hairpin rule with the same EIP and --random-fully option. Multiple SNATs with
395+
# different EIPs for the same CIDR are supported (for port exhaustion mitigation).
396+
#
397+
# Rule format: eip,internalCIDR[,--random-fully]
398+
# Example: 10.1.69.219,10.0.1.0/24,--random-fully
399+
# Creates: iptables -t nat -A HAIRPIN_SNAT -s 10.0.1.0/24 -d 10.0.1.0/24 -j SNAT --to-source 10.1.69.219 --random-fully
400+
function add_hairpin_snat() {
401+
# make sure inited
402+
check_inited
403+
local all_hairpin_rules
404+
all_hairpin_rules=$($iptables_save_cmd -t nat | grep HAIRPIN_SNAT)
405+
for rule in $@
406+
do
407+
arr=(${rule//,/ })
408+
eip=(${arr[0]//\// })
409+
internalCIDR=${arr[1]}
410+
randomFullyOption=${arr[2]}
411+
412+
# Check if this exact rule already exists (idempotent)
413+
if echo "$all_hairpin_rules" | grep -w -- "-s $internalCIDR" | grep -w -- "-d $internalCIDR" | grep -qE -- "--to-source $eip(\$| )"; then
414+
echo "Hairpin SNAT rule for $internalCIDR with EIP $eip already exists, skipping"
415+
continue
416+
fi
417+
418+
exec_cmd "$iptables_cmd -t nat -A HAIRPIN_SNAT -s $internalCIDR -d $internalCIDR -j SNAT --to-source $eip $randomFullyOption"
419+
echo "Hairpin SNAT rule added: $internalCIDR -> $eip"
420+
done
421+
}
422+
423+
# Delete a hairpin SNAT rule.
424+
# Args: eip,internalCIDR (comma-separated)
425+
function del_hairpin_snat() {
426+
# make sure inited
427+
check_inited
428+
local all_hairpin_rules
429+
all_hairpin_rules=$($iptables_save_cmd -t nat | grep HAIRPIN_SNAT)
430+
for rule in $@
431+
do
432+
arr=(${rule//,/ })
433+
eip=(${arr[0]//\// })
434+
internalCIDR=${arr[1]}
435+
# check if rule exists (idempotent - skip if not found)
436+
if echo "$all_hairpin_rules" | grep -w -- "-s $internalCIDR" | grep -w -- "-d $internalCIDR" | grep -qE -- "--to-source $eip(\$| )"; then
437+
exec_cmd "$iptables_cmd -t nat -D HAIRPIN_SNAT -s $internalCIDR -d $internalCIDR -j SNAT --to-source $eip"
438+
echo "Hairpin SNAT rule deleted: $internalCIDR -> $eip"
439+
fi
346440
done
347441
}
348442

@@ -1435,6 +1529,14 @@ case $opt in
14351529
echo "snat-del $rules"
14361530
del_snat $rules
14371531
;;
1532+
hairpin-snat-add)
1533+
echo "hairpin-snat-add $rules"
1534+
add_hairpin_snat $rules
1535+
;;
1536+
hairpin-snat-del)
1537+
echo "hairpin-snat-del $rules"
1538+
del_hairpin_snat $rules
1539+
;;
14381540
floating-ip-add)
14391541
echo "floating-ip-add $rules"
14401542
add_floating_ip $rules

test/e2e/iptables-vpc-nat-gw/e2e_test.go

Lines changed: 141 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,42 @@ func verifySubnetStatusAfterEIPOperation(subnetClient *framework.SubnetClient, s
249249
}
250250
}
251251

252+
// iptablesSaveNat returns the iptables-save output from the NAT gateway pod,
253+
// using the exact same detection logic as nat-gateway.sh to determine whether
254+
// to use iptables-legacy-save or iptables-save (nft backend).
255+
func iptablesSaveNat(natGwPodName string) string {
256+
// Replicate nat-gateway.sh detection: if iptables-legacy -t nat -S INPUT 1 succeeds,
257+
// rules were written via iptables-legacy, so use iptables-legacy-save to read them.
258+
// NOTE: KubectlExec joins args with space and passes to "/bin/sh -c", so pass
259+
// the entire script as a single string to avoid double shell wrapping.
260+
cmd := []string{"if iptables-legacy -t nat -S INPUT 1 2>/dev/null; then iptables-legacy-save -t nat; else iptables-save -t nat; fi"}
261+
stdout, _, err := framework.KubectlExec(framework.KubeOvnNamespace, natGwPodName, cmd...)
262+
framework.ExpectNoError(err, "failed to exec iptables-save in NAT gateway pod %s", natGwPodName)
263+
return string(stdout)
264+
}
265+
266+
// hairpinSnatChainExists checks if the HAIRPIN_SNAT chain exists in the NAT gateway pod.
267+
// Returns false on older versions that don't support this feature.
268+
func hairpinSnatChainExists(natGwPodName string) bool {
269+
output := iptablesSaveNat(natGwPodName)
270+
return strings.Contains(output, ":HAIRPIN_SNAT") || strings.Contains(output, "-N HAIRPIN_SNAT")
271+
}
272+
273+
// hairpinSnatRuleExists checks if hairpin SNAT rule exists in the NAT gateway pod
274+
// for the given CIDR and specific EIP.
275+
// Returns true if rule exists, false otherwise (including when HAIRPIN_SNAT chain doesn't exist).
276+
func hairpinSnatRuleExists(natGwPodName, cidr, eip string) bool {
277+
output := iptablesSaveNat(natGwPodName)
278+
if !strings.Contains(output, ":HAIRPIN_SNAT") && !strings.Contains(output, "-N HAIRPIN_SNAT") {
279+
return false
280+
}
281+
282+
hairpinRulePattern := fmt.Sprintf("-A HAIRPIN_SNAT -s %s -d %s -j SNAT --to-source %s", cidr, cidr, eip)
283+
return strings.Contains(output, hairpinRulePattern)
284+
}
285+
286+
// hairpinSnatRuleExistsForCIDR checks if any hairpin SNAT rule exists for the
287+
// given CIDR, regardless of which EIP it uses.
252288
var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
253289
f := framework.NewDefaultFramework("iptables-vpc-nat-gw")
254290

@@ -264,6 +300,7 @@ var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
264300
var iptablesFIPClient *framework.IptablesFIPClient
265301
var iptablesSnatRuleClient *framework.IptablesSnatClient
266302
var iptablesDnatRuleClient *framework.IptablesDnatClient
303+
var podClient *framework.PodClient
267304

268305
var dockerExtNet1Network *dockernetwork.Inspect
269306
var net1NicName string
@@ -421,6 +458,7 @@ var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
421458
vpcName = "vpc-" + randomSuffix
422459
vpcNatGwName = "gw-" + randomSuffix
423460
overlaySubnetName = "overlay-subnet-" + randomSuffix
461+
podClient = f.PodClient()
424462
})
425463

426464
framework.ConformanceIt("[1] change gateway image and custom annotations", func() {
@@ -543,6 +581,81 @@ var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
543581
iptablesSnatRuleClient.DeleteSync(snatName)
544582
})
545583

584+
// Verify hairpin SNAT rule is automatically created for internal CIDR
585+
ginkgo.By("Verifying hairpin SNAT rule exists in NAT gateway pod")
586+
vpcNatGwPodName := util.GenNatGwPodName(vpcNatGwName)
587+
snatEip = iptablesEIPClient.Get(snatEipName)
588+
if !hairpinSnatChainExists(vpcNatGwPodName) {
589+
framework.Logf("HAIRPIN_SNAT chain not found, skipping hairpin SNAT verification (feature requires v1.15+)")
590+
} else {
591+
gomega.Eventually(func() bool {
592+
return hairpinSnatRuleExists(vpcNatGwPodName, overlaySubnetV4Cidr, snatEip.Status.IP)
593+
}, 30*time.Second, 2*time.Second).Should(gomega.BeTrue(),
594+
"Hairpin SNAT rule should be created after SNAT creation")
595+
596+
// Verify real data-path: internal pod accessing another internal pod via FIP EIP
597+
// Packet flow: client -> NAT GW (DNAT to serverIP + hairpin SNAT to EIP) -> server -> NAT GW (un-SNAT/DNAT) -> client
598+
ginkgo.By("Verifying hairpin SNAT connectivity: internal pod (SNAT EIP) accessing another internal pod (FIP EIP)")
599+
serverPodName := "server-" + randomSuffix
600+
clientPodName := "client-" + randomSuffix
601+
hairpinFipEipName := "hairpin-fip-eip-" + randomSuffix
602+
hairpinFipName := "hairpin-fip-" + randomSuffix
603+
604+
// Create server pod in overlay subnet with auto-assigned IP
605+
serverAnnotations := map[string]string{
606+
util.LogicalSwitchAnnotation: overlaySubnetName,
607+
}
608+
serverPort := "8080"
609+
serverArgs := []string{"netexec", "--http-port", serverPort}
610+
serverPod := framework.MakePod(f.Namespace.Name, serverPodName, nil, serverAnnotations, framework.AgnhostImage, nil, serverArgs)
611+
_ = podClient.CreateSync(serverPod)
612+
ginkgo.DeferCleanup(func() {
613+
ginkgo.By("Cleaning up server pod " + serverPodName)
614+
podClient.DeleteSync(serverPodName)
615+
})
616+
617+
// Get server pod's auto-assigned IP for FIP binding
618+
createdServerPod := podClient.GetPod(serverPodName)
619+
serverPodIP := createdServerPod.Annotations[util.IPAddressAnnotation]
620+
framework.ExpectNotEmpty(serverPodIP, "server pod should have an IP assigned")
621+
framework.Logf("Server pod %s has IP %s", serverPodName, serverPodIP)
622+
623+
// Create a dedicated EIP and FIP to map FIP EIP -> server pod IP
624+
hairpinFipEip := framework.MakeIptablesEIP(hairpinFipEipName, "", "", "", vpcNatGwName, "", "")
625+
_ = iptablesEIPClient.CreateSync(hairpinFipEip)
626+
ginkgo.DeferCleanup(func() {
627+
ginkgo.By("Cleaning up hairpin FIP EIP " + hairpinFipEipName)
628+
iptablesEIPClient.DeleteSync(hairpinFipEipName)
629+
})
630+
hairpinFipEip = iptablesEIPClient.Get(hairpinFipEipName)
631+
framework.ExpectNotEmpty(hairpinFipEip.Status.IP, "hairpin FIP EIP should have an IP assigned")
632+
633+
hairpinFip := framework.MakeIptablesFIPRule(hairpinFipName, hairpinFipEipName, serverPodIP)
634+
_ = iptablesFIPClient.CreateSync(hairpinFip)
635+
ginkgo.DeferCleanup(func() {
636+
ginkgo.By("Cleaning up hairpin FIP " + hairpinFipName)
637+
iptablesFIPClient.DeleteSync(hairpinFipName)
638+
})
639+
640+
// Create client pod in same subnet (uses SNAT EIP for outbound traffic)
641+
clientAnnotations := map[string]string{
642+
util.LogicalSwitchAnnotation: overlaySubnetName,
643+
}
644+
clientPod := framework.MakePod(f.Namespace.Name, clientPodName, nil, clientAnnotations, framework.AgnhostImage, nil, []string{"pause"})
645+
_ = podClient.CreateSync(clientPod)
646+
ginkgo.DeferCleanup(func() {
647+
ginkgo.By("Cleaning up client pod " + clientPodName)
648+
podClient.DeleteSync(clientPodName)
649+
})
650+
651+
// Test connectivity: client pod (SNAT EIP) -> server pod (FIP EIP)
652+
ginkgo.By("Checking connectivity from client pod (SNAT EIP) to server pod (FIP EIP) " + hairpinFipEip.Status.IP)
653+
cmd := []string{"curl", "-m", "10", fmt.Sprintf("http://%s:%s/clientip", hairpinFipEip.Status.IP, serverPort)}
654+
output, _, err := framework.KubectlExec(f.Namespace.Name, clientPodName, cmd...)
655+
framework.ExpectNoError(err, "Client pod (SNAT EIP) should reach server pod via FIP EIP through hairpin SNAT")
656+
framework.Logf("Hairpin SNAT connectivity verified, output: %s", string(output))
657+
}
658+
546659
ginkgo.By("Creating iptables vip for dnat")
547660
dnatVip := framework.MakeVip(f.Namespace.Name, dnatVipName, overlaySubnetName, "", "", "")
548661
_ = vipClient.CreateSync(dnatVip)
@@ -618,8 +731,19 @@ var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
618731
iptablesSnatRuleClient.DeleteSync(sharedEipSnatName)
619732
})
620733

621-
ginkgo.By("Get share eip")
734+
// Verify hairpin SNAT rule is created for the shared SNAT (same CIDR, different EIP).
735+
// Hairpin mirrors SNAT 1:1: each SNAT creates its own hairpin rule.
736+
ginkgo.By("Getting share eip")
622737
shareEip = iptablesEIPClient.Get(sharedEipName)
738+
framework.ExpectNotEmpty(shareEip.Status.IP, "shareEip.Status.IP should not be empty")
739+
if hairpinSnatChainExists(vpcNatGwPodName) {
740+
ginkgo.By("Verifying hairpin SNAT rule exists for the shared SNAT EIP")
741+
gomega.Eventually(func() bool {
742+
return hairpinSnatRuleExists(vpcNatGwPodName, overlaySubnetV4Cidr, shareEip.Status.IP)
743+
}, 30*time.Second, 2*time.Second).Should(gomega.BeTrue(),
744+
"Hairpin SNAT rule should be created for shared SNAT EIP")
745+
}
746+
623747
ginkgo.By("Get share dnat")
624748
shareDnat = iptablesDnatRuleClient.Get(sharedEipDnatName)
625749
ginkgo.By("Get share snat")
@@ -643,6 +767,22 @@ var _ = framework.OrderedDescribe("[group:iptables-vpc-nat-gw]", func() {
643767
// make sure eip is shared
644768
nats := []string{util.DnatUsingEip, util.FipUsingEip, util.SnatUsingEip}
645769
framework.ExpectEqual(shareEip.Status.Nat, strings.Join(nats, ","))
770+
771+
// Verify hairpin SNAT rule cleanup when SNAT is deleted.
772+
// Hairpin lifecycle is 1:1 with SNAT: created together, deleted together.
773+
if hairpinSnatChainExists(vpcNatGwPodName) {
774+
ginkgo.By("Deleting snat to verify hairpin SNAT rule cleanup")
775+
iptablesSnatRuleClient.DeleteSync(snatName)
776+
ginkgo.By("Verifying hairpin SNAT rule for the deleted SNAT EIP is removed")
777+
gomega.Eventually(func() bool {
778+
return hairpinSnatRuleExists(vpcNatGwPodName, overlaySubnetV4Cidr, snatEip.Status.IP)
779+
}, 30*time.Second, 2*time.Second).Should(gomega.BeFalse(),
780+
"Hairpin SNAT rule should be deleted after SNAT deletion")
781+
ginkgo.By("Verifying hairpin SNAT rule for the shared SNAT EIP still exists")
782+
gomega.Expect(hairpinSnatRuleExists(vpcNatGwPodName, overlaySubnetV4Cidr, shareEip.Status.IP)).To(gomega.BeTrue(),
783+
"Hairpin SNAT rule for the shared SNAT EIP should NOT be affected by deleting a different SNAT")
784+
}
785+
646786
// All cleanup is handled by DeferCleanup above, no need for manual cleanup
647787
})
648788

0 commit comments

Comments
 (0)