Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions .github/workflows/e2e-kernel-mode.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
name: E2E Kernel Mode
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
e2e-kernel:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
go-version-file: go.mod

- name: Write kind config
run: |
cat > /tmp/kind-config.yaml <<'EOF'
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
containerdConfigPatches:
- |-
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."localhost:5000"]
endpoint = ["http://kind-registry:5000"]
nodes:
- role: control-plane
- role: worker
- role: worker
EOF

- name: Start local registry
run: |
docker run -d --restart=always -p 5000:5000 --name kind-registry registry:2

- name: Create kind cluster
uses: helm/kind-action@v1
with:
cluster_name: kernel-mode-test
node_image: kindest/node:v1.32.0
config: /tmp/kind-config.yaml

- name: Connect registry to kind network
run: |
docker network connect kind kind-registry || true

- name: Create secondary docker networks and attach to kind nodes
run: |
docker network create --driver bridge kind-secondary-1
docker network create --driver bridge kind-secondary-2
for node in $(kind get nodes --name kernel-mode-test); do
docker network connect kind-secondary-1 "$node"
docker network connect kind-secondary-2 "$node"
done

- name: Setup kind nodes (kernel modules, OVS)
run: |
for node in $(kind get nodes --name kernel-mode-test); do
docker exec "$node" bash -c "modprobe 8021q && modprobe bonding && modprobe openvswitch"
docker exec "$node" bash -c "apt-get update -qq && apt-get install -y -qq openvswitch-switch > /dev/null 2>&1 && systemctl start openvswitch-switch"
done

- name: Label kind worker nodes
run: |
for node in $(kubectl get nodes --no-headers -o custom-columns=NAME:.metadata.name | grep worker); do
kubectl label node "$node" node-role.kubernetes.io/worker="" --overwrite
done

- name: Deploy with cluster-sync
run: |
make cluster-up
make cluster-sync
env:
KUBEVIRT_PROVIDER: external
KUBECONFIG: /home/runner/.kube/config
DEV_IMAGE_REGISTRY: localhost:5000
IMAGE_BUILDER: docker
KUBEVIRT_NUM_NODES: 3
HANDLER_EXTRA_PARAMS: "--build-arg NMSTATE_SOURCE=packit"

- name: Run kernel mode e2e tests
run: make test-e2e-handler-kernel
env:
KUBEVIRT_PROVIDER: external
KUBECONFIG: /home/runner/.kube/config
SSH: /bin/true
PRIMARY_NIC: eth0
FIRST_SECONDARY_NIC: eth1
SECOND_SECONDARY_NIC: eth2

- name: Upload test logs
if: always()
uses: actions/upload-artifact@v4
with:
name: kernel-mode-test-logs
path: test_logs/
if-no-files-found: ignore
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,19 @@ test-reporter:
go run . --dry-run --fake=failure && \
go run . --dry-run --fake=stale

# Features supported in kernel mode (no NetworkManager).
# NodeSSH excluded: requires SSH to nodes (nmcli).
# VLAN creation works but assigning static IP on VLAN fails (nispor limitation).
KERNEL_MODE_FEATURES ?= (Nodes && !NodeSSH) || NNSDependencies || NNSFilter || \
(NNSTimestamp && !NodeSSH)

test-e2e-handler:
KUBECONFIG=$(KUBECONFIG) OPERATOR_NAMESPACE=$(OPERATOR_NAMESPACE) MONITORING_NAMESPACE=$(MONITORING_NAMESPACE) $(GINKGO) $(e2e_test_args) ./test/e2e/handler ...

test-e2e-handler-kernel:
KUBECONFIG=$(KUBECONFIG) OPERATOR_NAMESPACE=$(OPERATOR_NAMESPACE) MONITORING_NAMESPACE=$(MONITORING_NAMESPACE) \
$(GINKGO) $(e2e_test_args) --label-filter="$(KERNEL_MODE_FEATURES)" ./test/e2e/handler ...

test-e2e-operator: manifests
KUBECONFIG=$(KUBECONFIG) OPERATOR_NAMESPACE=$(OPERATOR_NAMESPACE) MONITORING_NAMESPACE=$(MONITORING_NAMESPACE) $(GINKGO) $(e2e_test_args) ./test/e2e/operator ...

Expand Down Expand Up @@ -286,6 +296,7 @@ olm-push: bundle-push index-push
check-gen \
operator-sdk \
test-e2e-handler \
test-e2e-handler-kernel \
test-e2e-operator \
test-e2e \
test-reporter\
Expand Down
5 changes: 5 additions & 0 deletions build/install-nmstate.packit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash -xe

dnf install -b -y dnf-plugins-core
dnf copr enable -y packit/nmstate-nmstate-3104
dnf install -b -y nmstate
37 changes: 21 additions & 16 deletions cluster/up.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,31 @@ set -ex

source ./cluster/kubevirtci.sh
kubevirtci::install
source ./cluster/sync-common.sh

$(kubevirtci::path)/cluster-up/up.sh

if [[ "$KUBEVIRT_PROVIDER" =~ ^(okd|ocp)-.*$$ ]]; then \
while ! $(KUBECTL) get securitycontextconstraints; do sleep 1; done; \
fi

echo 'Upgrading NetworkManager and enabling and starting up openvswitch'
for node in $(./cluster/kubectl.sh get nodes --no-headers | awk '{print $1}'); do
if [[ "$NM_VERSION" == "latest" ]]; then
echo "Installing NetworkManager from copr networkmanager/NetworkManager-main"
./cluster/cli.sh ssh ${node} -- sudo dnf install -y dnf-plugins-core
./cluster/cli.sh ssh ${node} -- sudo dnf copr enable -y networkmanager/NetworkManager-main
fi
./cluster/cli.sh ssh ${node} -- sudo dnf upgrade -y NetworkManager --allowerasing
./cluster/cli.sh ssh ${node} -- sudo systemctl daemon-reload
./cluster/cli.sh ssh ${node} -- sudo systemctl enable openvswitch
./cluster/cli.sh ssh ${node} -- sudo systemctl restart openvswitch
# Newer kubevirtci has dhclient installed so we should enforce not using it to
# keep using the NM internal DHCP client as we always have
./cluster/cli.sh ssh ${node} -- sudo rm -f /etc/NetworkManager/conf.d/002-dhclient.conf
./cluster/cli.sh ssh ${node} -- sudo systemctl restart NetworkManager
done
if isExternal; then
echo 'Skipping NetworkManager/OVS setup for external provider'
else
echo 'Upgrading NetworkManager and enabling and starting up openvswitch'
for node in $(./cluster/kubectl.sh get nodes --no-headers | awk '{print $1}'); do
if [[ "$NM_VERSION" == "latest" ]]; then
echo "Installing NetworkManager from copr networkmanager/NetworkManager-main"
./cluster/cli.sh ssh ${node} -- sudo dnf install -y dnf-plugins-core
./cluster/cli.sh ssh ${node} -- sudo dnf copr enable -y networkmanager/NetworkManager-main
fi
./cluster/cli.sh ssh ${node} -- sudo dnf upgrade -y NetworkManager --allowerasing
./cluster/cli.sh ssh ${node} -- sudo systemctl daemon-reload
./cluster/cli.sh ssh ${node} -- sudo systemctl enable openvswitch
./cluster/cli.sh ssh ${node} -- sudo systemctl restart openvswitch
# Newer kubevirtci has dhclient installed so we should enforce not using it to
# keep using the NM internal DHCP client as we always have
./cluster/cli.sh ssh ${node} -- sudo rm -f /etc/NetworkManager/conf.d/002-dhclient.conf
./cluster/cli.sh ssh ${node} -- sudo systemctl restart NetworkManager
done
fi
21 changes: 21 additions & 0 deletions cmd/handler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ import (
"github.com/nmstate/kubernetes-nmstate/pkg/file"
nmstatelog "github.com/nmstate/kubernetes-nmstate/pkg/log"
"github.com/nmstate/kubernetes-nmstate/pkg/monitoring"
"github.com/nmstate/kubernetes-nmstate/pkg/nm"
"github.com/nmstate/kubernetes-nmstate/pkg/nmstatectl"
"github.com/nmstate/kubernetes-nmstate/pkg/webhook"
)
Expand Down Expand Up @@ -224,9 +225,29 @@ func setupWebhookEnvironment(mgr manager.Manager) error {
return nil
}

// detectAndSetKernelMode checks if NetworkManager is available and enables kernel mode if not.
func detectAndSetKernelMode() {
_, err := nm.Version()
if err != nil {
setupLog.Info("NetworkManager not available, enabling kernel mode",
"error", err.Error())
nmstatectl.SetKernelMode(true)
} else {
setupLog.Info("NetworkManager detected, using standard mode")
}
}

// setupHandlerEnvironment cleans up unavailableNodeCounts after unexpected restart,
// configures the handler controllers and performs health checks
func setupHandlerEnvironment(mgr manager.Manager) error {
detectAndSetKernelMode()

if nmstatectl.IsKernelMode() {
if err := file.Touch("/tmp/kernel-mode"); err != nil {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The file path /tmp/kernel-mode is hardcoded. To improve maintainability and avoid magic strings, it would be better to define this path as a constant in a shared package (e.g., in pkg/environment or a new pkg/constants). This path is also used in the liveness probe definition in deploy/handler/operator.yaml and related tests, so a constant would ensure consistency.

setupLog.Error(err, "Failed to write kernel-mode flag file")
}
}

// Clean stale unavailable counts from node before starting controllers
// Prevents deadlock after unexpected cluster reboot where nodes were
// processing NNCP and left stale counts in etcd.
Expand Down
11 changes: 8 additions & 3 deletions controllers/handler/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,14 @@ func (r *NodeReconciler) getDependencyVersions() *nmstate.DependencyVersions {
r.Log.Error(err, "failed retrieving handler nmstate version")
}

hostNetworkManagerVersion, err := nm.Version()
if err != nil {
r.Log.Error(err, "error retrieving host Networkmanager version")
var hostNetworkManagerVersion string
if nmstatectl.IsKernelMode() {
hostNetworkManagerVersion = "N/A (kernel mode)"
} else {
hostNetworkManagerVersion, err = nm.Version()
if err != nil {
r.Log.Error(err, "error retrieving host Networkmanager version")
}
}

return &nmstate.DependencyVersions{
Expand Down
6 changes: 3 additions & 3 deletions controllers/operator/nmstate_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ var _ = Describe("NMState controller reconcile", func() {
ds := &appsv1.DaemonSet{}
err := cl.Get(context.Background(), handlerKey, ds)
Expect(err).ToNot(HaveOccurred())
expectedCommand := "nmstatectl show -vv 2>&1"
expectedCommand := "if [ -f /tmp/kernel-mode ]; then nmstatectl show -k -vv 2>&1; else nmstatectl show -vv 2>&1; fi"
Expect(ds.Spec.Template.Spec.Containers[0].LivenessProbe.Exec.Command).To(ContainElement(expectedCommand))
})
})
Expand Down Expand Up @@ -515,7 +515,7 @@ var _ = Describe("NMState controller reconcile", func() {
ds := &appsv1.DaemonSet{}
err := cl.Get(context.Background(), handlerKey, ds)
Expect(err).ToNot(HaveOccurred())
expectedCommand := "nmstatectl show 2>&1"
expectedCommand := "if [ -f /tmp/kernel-mode ]; then nmstatectl show -k 2>&1; else nmstatectl show 2>&1; fi"
Expect(ds.Spec.Template.Spec.Containers[0].LivenessProbe.Exec.Command).To(ContainElement(expectedCommand))
})
})
Expand All @@ -542,7 +542,7 @@ var _ = Describe("NMState controller reconcile", func() {
ds := &appsv1.DaemonSet{}
err := cl.Get(context.Background(), handlerKey, ds)
Expect(err).ToNot(HaveOccurred())
expectedCommand := "nmstatectl show 2>&1"
expectedCommand := "if [ -f /tmp/kernel-mode ]; then nmstatectl show -k 2>&1; else nmstatectl show 2>&1; fi"
Expect(ds.Spec.Template.Spec.Containers[0].LivenessProbe.Exec.Command).To(ContainElement(expectedCommand))
})
})
Expand Down
19 changes: 17 additions & 2 deletions deploy/handler/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,19 @@ spec:
tolerations: {{ toYaml .HandlerTolerations | nindent 8 }}
affinity: {{ toYaml .HandlerAffinity | nindent 8 }}
priorityClassName: system-node-critical
initContainers:
- name: ensure-dbus-socket
image: {{ .HandlerImage }}
imagePullPolicy: {{ .HandlerPullPolicy }}
command:
- sh
- -c
- "mkdir -p /host-run/dbus && test -e /host-run/dbus/system_bus_socket || touch /host-run/dbus/system_bus_socket"
volumeMounts:
- name: host-run
mountPath: /host-run
securityContext:
privileged: true
containers:
- name: nmstate-handler
args:
Expand Down Expand Up @@ -445,17 +458,19 @@ spec:
command:
- bash
- -c
- "nmstatectl show {{ .HandlerReadinessProbeExtraArg }} 2>&1"
- "if [ -f /tmp/kernel-mode ]; then nmstatectl show -k {{ .HandlerReadinessProbeExtraArg }} 2>&1; else nmstatectl show {{ .HandlerReadinessProbeExtraArg }} 2>&1; fi"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The shell command for the liveness probe is complex and is duplicated in tests, which can be brittle and hard to maintain.

if [ -f /tmp/kernel-mode ]; then nmstatectl show -k {{ .HandlerReadinessProbeExtraArg }} 2>&1; else nmstatectl show {{ .HandlerReadinessProbeExtraArg }} 2>&1; fi

Consider adding a small wrapper script to the container image (e.g., /usr/bin/liveness-probe.sh) that encapsulates this logic. The liveness probe would then simply execute this script. This would make the YAML manifest cleaner and centralize the probe logic.

initialDelaySeconds: 60
periodSeconds: 60
timeoutSeconds: 10
successThreshold: 1
failureThreshold: 5
volumes:
- name: host-run
hostPath:
path: /run
- name: dbus-socket
hostPath:
path: /run/dbus/system_bus_socket
type: Socket
- name: nmstate-lock
hostPath:
path: /var/k8s_nmstate
Expand Down
9 changes: 9 additions & 0 deletions pkg/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,15 @@ func ApplyDesiredState(ctx context.Context, cli client.Client, desiredState shar
return "Ignoring empty desired state", nil
}

if nmstatectl.IsKernelMode() {
log.Info("Kernel mode: applying desired state without checkpoint/rollback/probes")
setOutput, err := nmstatectl.Set(desiredState, DesiredStateConfigurationTimeout)
if err != nil {
return setOutput, err
}
return fmt.Sprintf("setOutput: %s \n", setOutput), nil
}

// Before apply we get the probes that are working fine, they should be
// working fine after apply
probes := probe.Select(ctx, cli)
Expand Down
Loading
Loading