Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# syntax=docker/dockerfile:1
FROM mcr.microsoft.com/devcontainers/go:1-1.24-bookworm

# CHANGE: Switched from debian-based Go image to Ubuntu Base
FROM mcr.microsoft.com/devcontainers/base:ubuntu-22.04

# Install kubectl
RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl \
RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \
&& install -m 0755 kubectl /usr/local/bin/kubectl \
&& rm kubectl

# Install Helm
RUN curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
RUN curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash
18 changes: 11 additions & 7 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"name": "Go",
"name": "Ubuntu Go",
"dockerFile": "Dockerfile",
"runArgs": [
"--privileged",
Expand All @@ -15,9 +15,9 @@
"installOhMyZshConfig": true,
"upgradePackages": true,
"nonFreePackages": true,
"username": "automatic",
"userUid": "automatic",
"userGid": "automatic"
"username": "root",
"userUid": "0",
"userGid": "0"
},
"ghcr.io/devcontainers/features/docker-outside-of-docker:1": {
"version": "latest",
Expand All @@ -33,11 +33,15 @@
"ghcr.io/devcontainers-extra/features/zsh-plugins:0": {
"plugins": "zsh-autosuggestions zsh-syntax-highlighting kubectl docker docker-compose git-auto-fetch git-commit golang helm",
"omzPlugins": "https://github.com/zsh-users/zsh-autosuggestions https://github.com/zsh-users/zsh-syntax-highlighting",
"username": "vscode"
"username": "root"
},
"ghcr.io/devcontainers/features/go:1": {
"version": "latest"
}
},
"mounts": [
"source=${env:HOME},target=/mnt/hosthome,type=bind,consistency=cached,readonly"
],
"postStartCommand": "if [ -f /mnt/hosthome/.p10k.zsh ]; then cp /mnt/hosthome/.p10k.zsh /home/vscode/.p10k.zsh; fi && git clone --depth=1 https://github.com/romkatv/powerlevel10k.git /home/vscode/.oh-my-zsh/custom/themes/powerlevel10k && sed -i 's|^ZSH_THEME=.*|ZSH_THEME=\\\"powerlevel10k/powerlevel10k\\\"|' /home/vscode/.zshrc && echo '\nif [[ -r \"${XDG_CACHE_HOME:-$HOME/.cache}/p10k-instant-prompt-${(%):-%n}.zsh\" ]]; then\n source \"${XDG_CACHE_HOME:-$HOME/.cache}/p10k-instant-prompt-${(%):-%n}.zsh\"\nfi\n\n[[ ! -f ~/.p10k.zsh ]] || source ~/.p10k.zsh' >> /home/vscode/.zshrc && docker buildx create --name liqo-builder --use --driver-opt network=host"
}
"postStartCommand": "if [ -f /mnt/hosthome/.p10k.zsh ]; then cp /mnt/hosthome/.p10k.zsh /root/.p10k.zsh; fi && git clone --depth=1 https://github.com/romkatv/powerlevel10k.git /root/.oh-my-zsh/custom/themes/powerlevel10k && sed -i 's|^ZSH_THEME=.*|ZSH_THEME=\"powerlevel10k/powerlevel10k\"|' /root/.zshrc && echo '\nif [[ -r \"${XDG_CACHE_HOME:-$HOME/.cache}/p10k-instant-prompt-${(%):-%n}.zsh\" ]]; then\n source \"${XDG_CACHE_HOME:-$HOME/.cache}/p10k-instant-prompt-${(%):-%n}.zsh\"\nfi\n\n[[ ! -f ~/.p10k.zsh ]] || source ~/.p10k.zsh' >> /root/.zshrc && docker buildx create --name liqo-builder --use --driver-opt network=host",
"remoteUser": "root"
}
4 changes: 3 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ endif

ifeq ($(shell uname),Darwin)
SED_COMMAND=sed -i '' -n '/rules/,$$p'
SED_INPLACE=sed -i ''
else
SED_COMMAND=sed -i -n '/rules/,$$p'
SED_INPLACE=sed -i
endif

generate: generate-groups rbacs manifests fmt
Expand Down Expand Up @@ -103,7 +105,7 @@ fmt: gci addlicense docs
go fmt ./...
find . -type f -name '*.go' -a ! -name '*zz_generated*' -exec $(GCI) write -s standard -s default -s "prefix(github.com/liqotech/liqo)" {} \;
find . -type f -name '*.go' -exec $(ADDLICENSE) -l apache -c "The Liqo Authors" -y "2019-$(shell date +%Y)" {} \;
find . -type f -name "*.go" -exec sed -i "s/Copyright 2019-[0-9]\{4\} The Liqo Authors/Copyright 2019-$(shell date +%Y) The Liqo Authors/" {} +
find . -type f -name "*.go" -exec $(SED_INPLACE) "s/Copyright 2019-[0-9]\{4\} The Liqo Authors/Copyright 2019-$(shell date +%Y) The Liqo Authors/" {} +

# Install golangci-lint if not available
golangci-lint:
Expand Down
5 changes: 4 additions & 1 deletion apis/networking/v1beta1/firewall/filterrule_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ const (
// ActionSetMetaMarkFromCtMark is the action to be applied to the rule.
// It is used to set the meta mark from the conntrack mark.
ActionSetMetaMarkFromCtMark FilterAction = "metamarkfromctmark"
// ActionTCPMssClamp is the action to be applied to the rule.
// It is used to clamp the TCP MSS.
ActionTCPMssClamp FilterAction = "tcpmssclamp"
)

// FilterRule is a rule to be applied to a filter chain.
Expand All @@ -35,7 +38,7 @@ type FilterRule struct {
// They can be multiple and they are applied with an AND operator.
Match []Match `json:"match"`
// Action is the action to be applied to the rule.
// +kubebuilder:validation:Enum=ctmark;metamarkfromctmark
// +kubebuilder:validation:Enum=ctmark;metamarkfromctmark;tcpmssclamp
Action FilterAction `json:"action"`
// Value is the value to be used for the action.
Value *string `json:"value,omitempty"`
Expand Down
1 change: 1 addition & 0 deletions cmd/gateway/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ func run(cmd *cobra.Command, _ []string) error {
connoptions.GwOptions.Name,
mgr.GetEventRecorderFor("firewall-controller"),
[]labels.Set{
gateway.ForgeFirewallAllGatewaysTargetLabels(),
gateway.ForgeFirewallInternalTargetLabels(),
remapping.ForgeFirewallTargetLabels(connoptions.GwOptions.RemoteClusterID),
remapping.ForgeFirewallTargetLabelsIPMappingGw(),
Expand Down
2 changes: 2 additions & 0 deletions deployments/liqo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@
| networking.fabric.pod.priorityClassName | string | `""` | PriorityClassName (https://kubernetes.io/docs/concepts/scheduling-eviction/pod-priority-preemption/#pod-priority) for the fabric pod. |
| networking.fabric.pod.resources | object | `{"limits":{},"requests":{}}` | Resource requests and limits (https://kubernetes.io/docs/user-guide/compute-resources/) for the fabric pod. |
| networking.fabric.tolerations | list | `[]` | Extra tolerations for the fabric pod. |
| networking.gateway.mssclamp | object | `{"enabled":true,"value":0}` | Enable the TCP MSS clamping on tunnel interfaces. Tunneling technologies introduce extra overhead that reduces the MTU, causing standard-sized Internet packets to exceed the tunnel's capacity and be dropped. TCP MSS Clamping resolves this by intercepting the initial TCP connection handshake and dynamically rewriting the Maximum Segment Size (MSS) value to match the smaller available space of the tunnel interface. This dynamic adjustment, per TCP-session, forces the remote server to generate smaller data packets that fit inside the tunnel, effectively preventing fragmentation issues and the common "black hole" phenomenon where connections establish but data transfer hangs indefinitely. |
| networking.gateway.mssclamp.value | int | `0` | Set the value for the mssclamp rule. Set to 0 to use automatic value discovery based on the MTU of the tunnel interface. |
| networking.gatewayTemplates | object | `{"container":{"gateway":{"image":{"name":"ghcr.io/liqotech/gateway","version":""}},"geneve":{"image":{"name":"ghcr.io/liqotech/gateway/geneve","version":""}},"wireguard":{"image":{"name":"ghcr.io/liqotech/gateway/wireguard","version":""}}},"ping":{"interval":"2s","lossThreshold":5,"updateStatusInterval":"10s"},"replicas":1,"server":{"service":{"allocateLoadBalancerNodePorts":"","annotations":{}}},"wireguard":{"implementation":"kernel"}}` | Set the options for the default gateway (server/client) templates. The default templates use a WireGuard implementation to connect the gateway of the clusters. These options are used to configure only the default templates and should not be considered if a custom template is used. |
| networking.gatewayTemplates.container.gateway.image.name | string | `"ghcr.io/liqotech/gateway"` | Image repository for the gateway container. |
| networking.gatewayTemplates.container.gateway.image.version | string | `""` | Custom version for the gateway image. If not specified, the global tag is used. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ spec:
enum:
- ctmark
- metamarkfromctmark
- tcpmssclamp
type: string
match:
description: |-
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{{- if .Values.networking.gateway.mssclamp.enabled }}
apiVersion: networking.liqo.io/v1beta1
kind: FirewallConfiguration
metadata:
name: gw-mss-clamping
labels:
networking.liqo.io/firewall-category: gateway
networking.liqo.io/firewall-subcategory: all-gateways
spec:
table:
family: IPV4
name: mss-clamping
chains:
- name: mss-clamping
type: filter
hook: forward
policy: accept
priority: 0
rules:
filterRules:
- action: tcpmssclamp
value: {{ .Values.networking.gateway.mssclamp.value | toString | quote }}
match:
- op: eq
dev:
position: out
value: liqo-tunnel
proto:
value: tcp
name: mss-clamping-out
{{- end }}
27 changes: 20 additions & 7 deletions deployments/liqo/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,20 @@ networking:
clientResources:
- apiVersion: networking.liqo.io/v1beta1
resource: wggatewayclients
gateway:
# -- Enable the TCP MSS clamping on tunnel interfaces.
# Tunneling technologies introduce extra overhead that reduces the MTU, causing standard-sized Internet
# packets to exceed the tunnel's capacity and be dropped. TCP MSS Clamping resolves this by intercepting
# the initial TCP connection handshake and dynamically rewriting the Maximum Segment Size (MSS)
# value to match the smaller available space of the tunnel interface. This dynamic adjustment, per
# TCP-session, forces the remote server to generate smaller data packets that fit inside the tunnel,
# effectively preventing fragmentation issues and the common "black hole" phenomenon where connections
# establish but data transfer hangs indefinitely.
mssclamp:
enabled: true
# -- Set the value for the mssclamp rule.
# Set to 0 to use automatic value discovery based on the MTU of the tunnel interface.
value: 0
# -- Set the options for the default gateway (server/client) templates.
# The default templates use a WireGuard implementation to connect the gateway of the clusters.
# These options are used to configure only the default templates and should not be considered
Expand Down Expand Up @@ -119,11 +133,11 @@ networking:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: liqo.io/type
operator: NotIn
values:
- virtual-node
- matchExpressions:
- key: liqo.io/type
operator: NotIn
values:
- virtual-node
# -- Extra tolerations for the fabric pod.
tolerations: []
config:
Expand Down Expand Up @@ -536,8 +550,7 @@ discovery:
clusterID: ""
# -- A set of labels that characterizes the local cluster when exposed remotely as a virtual node.
# It is suggested to specify the distinguishing characteristics that may be used to decide whether to offload pods on this cluster.
clusterLabels:
{}
clusterLabels: {}
# topology.kubernetes.io/zone: us-east-1
# liqo.io/provider: your-provider

Expand Down
9 changes: 9 additions & 0 deletions docs/advanced/peering/inter-cluster-network.md
Original file line number Diff line number Diff line change
Expand Up @@ -628,3 +628,12 @@ spec:
# Optional field - included only if .Spec.ExtraConfig is not empty
?extraConfig: "{{ .Spec.ExtraConfig }}"
```

## IP Traffic Fragmentation

Tunneling technologies, such as Wireguard used to connect two Liqo clusters, introduce extra overhead that reduces the MTU, causing standard-sized Internet packets to exceed the tunnel's capacity and be dropped.
TCP MSS Clamping resolves this by intercepting the initial TCP connection handshake and dynamically rewriting the Maximum Segment Size (MSS) value to match the smaller available space of the tunnel interface.
This dynamic adjustment, which is done per each TCP session, forces the remote server to generate smaller data packets that fit inside the inter-cluster tunnel, effectively preventing fragmentation issues and the common _black hole_ phenomenon where connections establish but data transfer hangs indefinitely.

By default, the Liqo Gateway implements TCP MSS Clamping, hence it is able to adjust the maximum size of TCP segments based on the actual MTU of the tunnel.
However, this mechanism does not work with UDP traffic: UDP packets with the IP `Don't Fragment` flag may be dropped if their size exceeds the maximum allowed value in the tunnel.
81 changes: 81 additions & 0 deletions pkg/firewall/utils/filterule.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ func forgeFilterRule(fr *firewallv1beta1.FilterRule, chain *nftables.Chain) (*nf
}
case firewallv1beta1.ActionSetMetaMarkFromCtMark:
applySetMetaMarkFromCtMarkAction(rule)
case firewallv1beta1.ActionTCPMssClamp:
if err := applyTCPMssClampAction(fr.Value, rule); err != nil {
return nil, fmt.Errorf("cannot apply tcpmssclamp action: %w", err)
}
default:
}
return rule, nil
Expand Down Expand Up @@ -159,3 +163,80 @@ func applySetMetaMarkFromCtMarkAction(rule *nftables.Rule) {
},
)
}

func applyTCPMssClampAction(value *string, rule *nftables.Rule) error {
var (
err error
size int
)

if value != nil {
size, err = strconv.Atoi(*value)
if err != nil {
return fmt.Errorf("cannot convert value to int: %w", err)
}
}

rule.Exprs = append(rule.Exprs,
// Match TCP SYN flag
// Load TCP flags byte (offset 13 in TCP header)
&expr.Payload{
DestRegister: 1,
Base: expr.PayloadBaseTransportHeader,
Offset: 13, // TCP flags offset
Len: 1,
},
// Apply bitmask to check SYN flag (0x02)
&expr.Bitwise{
DestRegister: 1,
SourceRegister: 1,
Len: 1,
Mask: []byte{0x02}, // SYN flag mask
Xor: []byte{0x00},
},
// Check if SYN flag is set (not equal to 0)
&expr.Cmp{
Op: expr.CmpOpNeq,
Register: 1,
Data: []byte{0x00},
})

if size == 0 {
rule.Exprs = append(rule.Exprs, // Load route MTU into register 1
&expr.Rt{
Register: 1,
Key: expr.RtTCPMSS,
},

// Convert to network byte order (host to network)
&expr.Byteorder{
DestRegister: 1,
SourceRegister: 1,
Op: expr.ByteorderHton,
Len: 2,
Size: 2,
})
} else {
rule.Exprs = append(rule.Exprs,
// Load fixed MSS value into register 1
&expr.Immediate{
Register: 1,
Data: []byte{byte(size >> 8), byte(size)},
},
)
}

rule.Exprs = append(rule.Exprs,
// Write the MSS value to TCP option maxseg
// TCP option type 2 = MSS, offset 2, length 2 bytes
&expr.Exthdr{
SourceRegister: 1,
Type: 2, // TCP option MSS
Offset: 2, // Offset within the option
Len: 2, // 2 bytes for MSS value
Op: expr.ExthdrOpTcpopt,
},
)

return nil
}
12 changes: 12 additions & 0 deletions pkg/gateway/label.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ const (

// FirewallSubCategoryFabricTargetValue is the value used by the firewallconfiguration controller to reconcile only resources related to a gateway.
FirewallSubCategoryFabricTargetValue = "fabric"

// FirewallSubCategoryAllGatewaysTargetValue is the value used by the firewallconfiguration controller to reconcile a resource on every gateway.
FirewallSubCategoryAllGatewaysTargetValue = "all-gateways"
)

// ForgeActiveGatewayPodLabels returns the labels for the gateway pod.
Expand Down Expand Up @@ -88,3 +91,12 @@ func ForgeFirewallInternalTargetLabels() map[string]string {
firewall.FirewallSubCategoryTargetKey: FirewallSubCategoryFabricTargetValue,
}
}

// ForgeFirewallAllGatewaysTargetLabels returns the labels used by the firewallconfiguration controller
// to reconcile only resources related to all gateways.
func ForgeFirewallAllGatewaysTargetLabels() map[string]string {
return map[string]string{
firewall.FirewallCategoryTargetKey: FirewallCategoryGwTargetValue,
firewall.FirewallSubCategoryTargetKey: FirewallSubCategoryAllGatewaysTargetValue,
}
}
32 changes: 32 additions & 0 deletions pkg/webhooks/firewallconfiguration/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ func checkRulesInChain(chain *firewallapi.Chain) error {
if err := checkUniqueRuleNames(rules); err != nil {
return forgeChainError(chain, err)
}
if err := checkFilterRules(rules); err != nil {
return forgeChainError(chain, err)
}
return nil
}

Expand Down Expand Up @@ -60,6 +63,35 @@ func checkUniqueRuleNames(rules []firewallutils.Rule) error {
return nil
}

func checkFilterRules(rules []firewallutils.Rule) error {
for i := range rules {
if r, ok := rules[i].(*firewallutils.FilterRuleWrapper); ok {
if err := checkFilterRule(r.FilterRule); err != nil {
return fmt.Errorf("rule %v is invalid: %w", r.Name, err)
}
}
}
return nil
}

func checkFilterRule(r *firewallapi.FilterRule) error {
switch r.Action {
case firewallapi.ActionTCPMssClamp:
return checkFilterRuleTCPMssClamp(r)
default:
return nil
}
}

func checkFilterRuleTCPMssClamp(r *firewallapi.FilterRule) error {
for i := range r.Match {
if r.Match[i].Proto.Value == firewallapi.L4ProtoTCP && r.Match[i].Op == firewallapi.MatchOperationEq {
return nil
}
}
return fmt.Errorf("tcp mss clamp rule should have a match for tcp protocol")
}

func generateRuleNames(chains []firewallapi.Chain) {
for i := range chains {
rules := firewall.FromChainToRulesArray(&chains[i])
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/cruise/network/network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ var _ = Describe("Liqo E2E", func() {
RestartPods(testContext.Clusters[i].ControllerClient)
}

time.Sleep(time.Second * 60)

// Check if there is only one active gateway pod per remote cluster.
for i := range testContext.Clusters {
numActiveGateway := testContext.Clusters[i].NumPeeredConsumers + testContext.Clusters[i].NumPeeredProviders
Expand Down
Loading