diff --git a/.gitignore b/.gitignore index 46e9ada602..7768f51bed 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,11 @@ docs/_build /tmp /graphviz /k3s-ansible +/rke2-ansible + +# Local Claude Code development files +.claude/ +.claude-flow/ +CLAUDE.md +*.claude-test.* +.claude-cache/ diff --git a/cmd/liqoctl/cmd/install.go b/cmd/liqoctl/cmd/install.go index d7bf8af01f..5fd2209d4f 100644 --- a/cmd/liqoctl/cmd/install.go +++ b/cmd/liqoctl/cmd/install.go @@ -31,6 +31,7 @@ import ( "github.com/liqotech/liqo/pkg/liqoctl/install/kind" "github.com/liqotech/liqo/pkg/liqoctl/install/kubeadm" "github.com/liqotech/liqo/pkg/liqoctl/install/openshift" + "github.com/liqotech/liqo/pkg/liqoctl/install/rke2" "github.com/liqotech/liqo/pkg/liqoctl/output" "github.com/liqotech/liqo/pkg/liqoctl/utils" "github.com/liqotech/liqo/pkg/liqoctl/version" @@ -188,6 +189,7 @@ func newInstallCommand(ctx context.Context, f *factory.Factory) *cobra.Command { utils.AddCommand(cmd, newInstallProviderCommand(ctx, options.CommonOptions, kind.New)) utils.AddCommand(cmd, newInstallProviderCommand(ctx, options.CommonOptions, kubeadm.New)) utils.AddCommand(cmd, newInstallProviderCommand(ctx, options.CommonOptions, openshift.New)) + utils.AddCommand(cmd, newInstallProviderCommand(ctx, options.CommonOptions, rke2.New)) return cmd } diff --git a/docs/installation/compatibility.md b/docs/installation/compatibility.md index 0fb3fcc3b5..430918496e 100644 --- a/docs/installation/compatibility.md +++ b/docs/installation/compatibility.md @@ -20,6 +20,7 @@ While the following list includes providers that we have specifically tested, Li | Kubeadm (Cilium) | ✅ | No known issues | | Kubeadm (Cilium with kube-proxy replacement) | 🟢 | `NodePortExposition` and `LoadBalancerExposition` | | K3s | 🟢 | `RemoteExec` | +| RKE2 | 🟢 | Supports out-of-band peering for restricted networks; `RemoteExec` similar to K3s | | K0s | ✅ | No known issues | | AKS (Azure CNI Overlay) | 🟢 | `CrossClusterAPIServerInteraction` and `ExternalIPRemapping` | | AKS (Azure CNI (Legacy)) | 🟢 | `CrossClusterAPIServerInteraction` and `NodePortExposition` | diff --git a/docs/installation/install.md b/docs/installation/install.md index 2f0ac80735..2acb353e19 100644 --- a/docs/installation/install.md +++ b/docs/installation/install.md @@ -341,6 +341,52 @@ Alternatively, you can manually specify a desired id with the `--cluster-id` fla ```` +````{tab-item} RKE2 + +```{warning} +RKE2 shares limitations with K3s: +- `kubectl exec` may not work on pods scheduled to virtual nodes +- For restricted networks, use [out-of-band peering](../usage/rke2-oob-peering) +``` + +```{admonition} Note +RKE2 API server runs on port 9345 (not the standard 6443). Ensure your kubeconfig is properly configured. +``` + +**Installation** + +Liqo can be installed on an RKE2 cluster with: + +```bash +liqoctl install rke2 +``` + +Override the API server URL if needed (e.g., NAT, load balancer): + +```bash +liqoctl install rke2 --api-server-url https://rke2.example.com:9345 +``` + +The cluster ID is auto-generated. Specify a custom ID with `--cluster-id`: + +```bash +liqoctl install rke2 --cluster-id my-rke2-cluster +``` + +**Out-of-Band Peering** + +For restricted networks where clusters cannot directly communicate, use manual peering: + +```bash +liqoctl install rke2 --cluster-id cluster-1 +# Repeat on second cluster with different ID +# Then follow the out-of-band peering guide +``` + +See [RKE2 out-of-band peering](../usage/rke2-oob-peering) for the complete procedure. + +```` + ````{tab-item} KinD **Installation** diff --git a/docs/usage/rke2-oob-peering.md b/docs/usage/rke2-oob-peering.md new file mode 100644 index 0000000000..3b1c6e8077 --- /dev/null +++ b/docs/usage/rke2-oob-peering.md @@ -0,0 +1,148 @@ +# RKE2 Out-of-Band Peering + +This section describes how to establish peering between RKE2 clusters in restricted networks where `liqoctl peer` cannot be used (e.g., different networks, security policies, GitOps workflows). + +## Overview + +Out-of-band peering manually creates `ForeignCluster` resources on each cluster, enabling peering without direct cluster-to-cluster communication during setup. +This approach is essential when: + +- Clusters cannot directly communicate +- Using declarative GitOps workflows +- Different organizations manage each cluster + +## Prerequisites + +- Two RKE2 clusters with Liqo installed +- Secure method to exchange configuration (e.g., shared storage, secure transfer) + +```{admonition} Note +For standard peering where both clusters are accessible, use `liqoctl peer` instead. See [peer two clusters](/usage/peer). +``` + +## Install Liqo on both clusters + +First, install Liqo with explicit cluster IDs: + +```bash +# Consumer cluster +liqoctl install rke2 --cluster-id consumer-rke2 + +# Provider cluster +liqoctl install rke2 --cluster-id provider-rke2 +``` + +## Create ForeignCluster resources + +### On the consumer + +Create a `ForeignCluster` representing the provider: + +```yaml +apiVersion: core.liqo.io/v1beta1 +kind: ForeignCluster +metadata: + name: provider-rke2 + labels: + liqo.io/remote-cluster-id: provider-rke2 +spec: + clusterID: provider-rke2 + modules: + networking: {enabled: true} + authentication: {enabled: true} + offloading: {enabled: true} +``` + +```bash +kubectl apply -f consumer-foreigncluster.yaml +``` + +### On the provider + +Create a `ForeignCluster` representing the consumer: + +```yaml +apiVersion: core.liqo.io/v1beta1 +kind: ForeignCluster +metadata: + name: consumer-rke2 + labels: + liqo.io/remote-cluster-id: consumer-rke2 +spec: + clusterID: consumer-rke2 + modules: + networking: {enabled: true} + authentication: {enabled: true} + offloading: {enabled: true} +``` + +```bash +kubectl apply -f provider-foreigncluster.yaml +``` + +```{admonition} Important +The `liqo.io/remote-cluster-id` label improves lookup performance (O(1) vs O(n)). +``` + +## Exchange credentials + +The peering modules require manual credential exchange. Refer to the individual module documentation: + +- [Networking](/advanced/peering/inter-cluster-network) - Gateway configuration +- [Authentication](/advanced/peering/inter-cluster-authentication) - Identity exchange +- [Offloading](/advanced/peering/offloading-in-depth) - ResourceSlice creation + +## Verify peering + +Check the `ForeignCluster` status: + +```bash +# Consumer +kubectl get foreigncluster -n liqo provider-rke2 -o yaml +kubectl get nodes -l liqo.io/type=virtual-node + +# Provider +kubectl get foreigncluster -n liqo consumer-rke2 -o yaml +kubectl get resourceslice -n liqo +``` + +## GitOps integration + +Store `ForeignCluster` manifests in Git and apply via your GitOps operator: + +``` +gitops-repo/ +├── clusters/ +│ ├── consumer-rke2/ +│ │ └── foreignclusters/ +│ │ └── provider-rke2.yaml +│ └── provider-rke2/ +│ └── foreignclusters/ +│ └── consumer-rke2.yaml +``` + +## Troubleshooting + +**ForeignCluster not found:** + +```bash +# Verify name matches cluster ID +kubectl get foreigncluster -A + +# Add label if missing +kubectl label foreigncluster liqo.io/remote-cluster-id= -n liqo +``` + +**Networking issues:** + +```bash +kubectl get gatewayserver,gatewayclient -n liqo +kubectl logs -n liqo -l app.kubernetes.io/component=liqo-fabric +``` + +**Authentication failures:** + +```bash +kubectl get identity -n liqo +kubectl logs -n liqo -l app.kubernetes.io/name=controller-manager +``` diff --git a/pkg/liqoctl/install/rke2/provider.go b/pkg/liqoctl/install/rke2/provider.go new file mode 100644 index 0000000000..a19b00ed81 --- /dev/null +++ b/pkg/liqoctl/install/rke2/provider.go @@ -0,0 +1,86 @@ +// Copyright 2019-2025 The Liqo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package rke2 + +import ( + "context" + + "github.com/spf13/cobra" + + "github.com/liqotech/liqo/pkg/liqoctl/install" +) + +var _ install.Provider = (*Options)(nil) + +// Options encapsulates the arguments of the install rke2 command. +type Options struct { + *install.Options +} + +// New initializes a new Provider object. +func New(o *install.Options) install.Provider { + return &Options{Options: o} +} + +// Name returns the name of the provider. +func (o *Options) Name() string { return "rke2" } + +// Examples returns the examples string for the given provider. +func (o *Options) Examples() string { + return `Examples: + $ {{ .Executable }} install rke2 --api-server-url https://liqo.example.local:9345 \ + --cluster-labels region=us-west,environment=production \ + --reserved-subnets 172.16.0.0/16,192.16.254.0/24 +or + $ {{ .Executable }} install rke2 --api-server-url https://liqo.example.local:9345 \ + --cluster-labels region=us-west,environment=production \ + --pod-cidr 10.0.0.0/16 --service-cidr 10.1.0.0/16 \ + --reserved-subnets 172.16.0.0/16,192.16.254.0/24 +or (with out-of-band peering for restricted networks) + $ {{ .Executable }} install rke2 --api-server-url https://liqo.example.local:9345 \ + --cluster-id my-rke2-cluster \ + --cluster-labels region=us-west,environment=production +` +} + +// RegisterFlags registers the flags for the given provider. +func (o *Options) RegisterFlags(cmd *cobra.Command) { + cmd.Flags().StringVar(&o.APIServer, "api-server-url", "", "The Kubernetes API Server URL (defaults to the one specified in the kubeconfig)") + cmd.Flags().StringVar(&o.PodCIDR, "pod-cidr", "10.42.0.0/16", "The Pod CIDR of the cluster") + cmd.Flags().StringVar(&o.ServiceCIDR, "service-cidr", "10.43.0.0/16", "The Service CIDR of the cluster") +} + +// Initialize performs the initialization tasks to retrieve the provider-specific parameters. +func (o *Options) Initialize(_ context.Context) error { + // RKE2 API server typically runs on port 9345 and may use localhost addresses. + // Disable API server sanity checks to support these scenarios. + o.DisableAPIServerSanityChecks = true + return nil +} + +// Values returns the customized provider-specifc values file parameters. +func (o *Options) Values() map[string]interface{} { + return map[string]interface{}{ + "networking": map[string]interface{}{ + "fabric": map[string]interface{}{ + "config": map[string]interface{}{ + // RKE2 uses nftables by default, but monitoring can cause issues + // in some environments, similar to K3s + "nftablesMonitor": false, + }, + }, + }, + } +} diff --git a/pkg/utils/foreigncluster/getters.go b/pkg/utils/foreigncluster/getters.go index 3d0ce96c9c..64e7a4c2a4 100644 --- a/pkg/utils/foreigncluster/getters.go +++ b/pkg/utils/foreigncluster/getters.go @@ -31,11 +31,33 @@ import ( ) // GetForeignClusterByID returns a ForeignCluster CR retrieving it by its clusterID. +// +// This function implements a three-tier fallback lookup strategy to support both +// standard label-based peering and out-of-band (manual) peering scenarios: +// +// 1. Label-based lookup (O(1) with index): Searches for ForeignClusters with the +// liqo.io/remote-cluster-id label matching the clusterID. This is the standard +// path used by liqoctl peer and is highly efficient. +// +// 2. Name-based lookup (O(1)): Fallback for out-of-band peering where ForeignCluster +// resources are created manually (e.g., via GitOps, kubectl apply) with name == clusterID. +// Common in RKE2 deployments and restricted network environments. +// +// 3. Exhaustive search (O(n)): Final fallback that iterates through ALL ForeignClusters +// to find one with spec.ClusterID matching the requested ID. This is expensive and +// should rarely be triggered in production. +// +// Performance considerations: +// - In clusters with many ForeignClusters (>100), the exhaustive search can impact +// API server performance. Consider adding liqo.io/remote-cluster-id labels to +// manually-created ForeignClusters to avoid this fallback. +// - The function logs when fallback #2 or #3 is used to aid in debugging and +// identifying misconfigured resources. func GetForeignClusterByID(ctx context.Context, cl client.Client, clusterID liqov1beta1.ClusterID) (*liqov1beta1.ForeignCluster, error) { + // Fallback #1: Label-based lookup (most efficient, O(1) with index) lSelector := labels.SelectorFromSet(labels.Set{ consts.RemoteClusterID: string(clusterID), }) - // get the foreign cluster by clusterID label foreignClusterList := liqov1beta1.ForeignClusterList{} if err := cl.List(ctx, &foreignClusterList, &client.ListOptions{ LabelSelector: lSelector, @@ -43,7 +65,44 @@ func GetForeignClusterByID(ctx context.Context, cl client.Client, clusterID liqo return nil, err } - return getForeignCluster(&foreignClusterList, clusterID) + // If found by label, return immediately (fast path) + if len(foreignClusterList.Items) > 0 { + klog.V(4).Infof("Found ForeignCluster %s by label lookup (fast path)", clusterID) + return getForeignCluster(&foreignClusterList, clusterID) + } + + // Fallback #2: Name-based lookup for out-of-band peering (O(1)) + // This supports manually-created ForeignCluster resources where name == clusterID + klog.V(4).Infof("Label lookup failed for ForeignCluster %s, trying name-based lookup (out-of-band peering)", clusterID) + fc := &liqov1beta1.ForeignCluster{} + err := cl.Get(ctx, client.ObjectKey{Name: string(clusterID)}, fc) + if err != nil { + if kerrors.IsNotFound(err) { + // Fallback #3: Exhaustive search through all ForeignClusters (O(n)) + // WARNING: This is expensive and can impact performance in large clusters + klog.Warningf("Name-based lookup failed for ForeignCluster %s, performing exhaustive search across all ForeignClusters (expensive operation)", clusterID) + allFCs := &liqov1beta1.ForeignClusterList{} + if listErr := cl.List(ctx, allFCs); listErr == nil { + for i := range allFCs.Items { + if allFCs.Items[i].Spec.ClusterID == clusterID { + klog.Warningf("Found ForeignCluster %s via exhaustive search. Consider adding the %s label to this resource for better performance", + clusterID, consts.RemoteClusterID) + return &allFCs.Items[i], nil + } + } + } + } + return nil, kerrors.NewNotFound(liqov1beta1.ForeignClusterGroupResource, fmt.Sprintf("foreign cluster with ID %s", clusterID)) + } + + // Validate that the ForeignCluster found by name has the correct spec.ClusterID + if fc.Spec.ClusterID != "" && fc.Spec.ClusterID != clusterID { + klog.Warningf("ForeignCluster %s found by name but spec.ClusterID mismatch (expected: %s, got: %s)", fc.Name, clusterID, fc.Spec.ClusterID) + return nil, kerrors.NewNotFound(liqov1beta1.ForeignClusterGroupResource, fmt.Sprintf("foreign cluster with ID %s", clusterID)) + } + + klog.V(4).Infof("Found ForeignCluster %s by name-based lookup (out-of-band peering)", clusterID) + return fc, nil } // GetForeignClusterByIDWithDynamicClient returns a ForeignCluster CR retrieving it by its clusterID, using the dynamic interface. @@ -85,7 +144,7 @@ func GetOlderForeignCluster( var olderTime *metav1.Time for i := range foreignClusterList.Items { fc := &foreignClusterList.Items[i] - if olderTime.IsZero() || fc.CreationTimestamp.Before(olderTime) { + if olderTime == nil || fc.CreationTimestamp.Before(olderTime) { olderTime = &fc.CreationTimestamp foreignCluster = fc } diff --git a/pkg/utils/foreigncluster/getters_suite_test.go b/pkg/utils/foreigncluster/getters_suite_test.go new file mode 100644 index 0000000000..d12a12adee --- /dev/null +++ b/pkg/utils/foreigncluster/getters_suite_test.go @@ -0,0 +1,27 @@ +// Copyright 2019-2025 The Liqo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package foreigncluster_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestForeignCluster(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "ForeignCluster Utils Suite") +} diff --git a/pkg/utils/foreigncluster/getters_test.go b/pkg/utils/foreigncluster/getters_test.go new file mode 100644 index 0000000000..250c6d6224 --- /dev/null +++ b/pkg/utils/foreigncluster/getters_test.go @@ -0,0 +1,280 @@ +// Copyright 2019-2025 The Liqo Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package foreigncluster_test + +import ( + "context" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + kerrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + liqov1beta1 "github.com/liqotech/liqo/apis/core/v1beta1" + "github.com/liqotech/liqo/pkg/consts" + "github.com/liqotech/liqo/pkg/utils/foreigncluster" +) + +var _ = Describe("GetForeignClusterByID function", func() { + const ( + testClusterID1 = liqov1beta1.ClusterID("test-cluster-1") + testClusterID2 = liqov1beta1.ClusterID("test-cluster-2") + testClusterID3 = liqov1beta1.ClusterID("test-cluster-3") + testClusterID4 = liqov1beta1.ClusterID("test-cluster-4") + nonExistentID = liqov1beta1.ClusterID("non-existent-cluster") + ) + + var ( + ctx context.Context + scheme *runtime.Scheme + cl client.Client + ) + + BeforeEach(func() { + ctx = context.Background() + scheme = runtime.NewScheme() + Expect(liqov1beta1.AddToScheme(scheme)).To(Succeed()) + }) + + Context("Fallback #1: Label-based lookup (fast path)", func() { + BeforeEach(func() { + // Create a ForeignCluster with the standard label + fc := &liqov1beta1.ForeignCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fc-with-label", + Labels: map[string]string{ + consts.RemoteClusterID: string(testClusterID1), + }, + }, + Spec: liqov1beta1.ForeignClusterSpec{ + ClusterID: testClusterID1, + }, + } + cl = fake.NewClientBuilder().WithScheme(scheme).WithObjects(fc).Build() + }) + + It("should find ForeignCluster by label", func() { + fc, err := foreigncluster.GetForeignClusterByID(ctx, cl, testClusterID1) + Expect(err).ToNot(HaveOccurred()) + Expect(fc).ToNot(BeNil()) + Expect(fc.Spec.ClusterID).To(Equal(testClusterID1)) + Expect(fc.Name).To(Equal("fc-with-label")) + }) + + It("should return not found for non-existent cluster", func() { + fc, err := foreigncluster.GetForeignClusterByID(ctx, cl, nonExistentID) + Expect(err).To(HaveOccurred()) + Expect(kerrors.IsNotFound(err)).To(BeTrue()) + Expect(fc).To(BeNil()) + }) + }) + + Context("Fallback #2: Name-based lookup (out-of-band peering)", func() { + BeforeEach(func() { + // Create a ForeignCluster without the label, but with name == clusterID + // This simulates manual/out-of-band creation + fc := &liqov1beta1.ForeignCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: string(testClusterID2), // Name matches clusterID + // Note: No liqo.io/remote-cluster-id label + }, + Spec: liqov1beta1.ForeignClusterSpec{ + ClusterID: testClusterID2, + }, + } + cl = fake.NewClientBuilder().WithScheme(scheme).WithObjects(fc).Build() + }) + + It("should find ForeignCluster by name when label is missing", func() { + fc, err := foreigncluster.GetForeignClusterByID(ctx, cl, testClusterID2) + Expect(err).ToNot(HaveOccurred()) + Expect(fc).ToNot(BeNil()) + Expect(fc.Spec.ClusterID).To(Equal(testClusterID2)) + Expect(fc.Name).To(Equal(string(testClusterID2))) + }) + + It("should validate spec.ClusterID matches requested ID", func() { + // Create a ForeignCluster where name matches but spec.ClusterID doesn't + fcMismatch := &liqov1beta1.ForeignCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: string(testClusterID3), + }, + Spec: liqov1beta1.ForeignClusterSpec{ + ClusterID: "different-cluster-id", // Mismatch! + }, + } + cl = fake.NewClientBuilder().WithScheme(scheme).WithObjects(fcMismatch).Build() + + fc, err := foreigncluster.GetForeignClusterByID(ctx, cl, testClusterID3) + Expect(err).To(HaveOccurred()) + Expect(kerrors.IsNotFound(err)).To(BeTrue()) + Expect(fc).To(BeNil()) + }) + }) + + Context("Fallback #3: Exhaustive search (expensive operation)", func() { + BeforeEach(func() { + // Create multiple ForeignClusters, none with the correct label or name + // Only one has the matching spec.ClusterID + fc1 := &liqov1beta1.ForeignCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "random-name-1", + }, + Spec: liqov1beta1.ForeignClusterSpec{ + ClusterID: "some-other-cluster", + }, + } + fc2 := &liqov1beta1.ForeignCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "random-name-2", + }, + Spec: liqov1beta1.ForeignClusterSpec{ + ClusterID: testClusterID4, // This is the one we're looking for + }, + } + fc3 := &liqov1beta1.ForeignCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "random-name-3", + }, + Spec: liqov1beta1.ForeignClusterSpec{ + ClusterID: "yet-another-cluster", + }, + } + cl = fake.NewClientBuilder().WithScheme(scheme).WithObjects(fc1, fc2, fc3).Build() + }) + + It("should find ForeignCluster via exhaustive search when label and name lookups fail", func() { + fc, err := foreigncluster.GetForeignClusterByID(ctx, cl, testClusterID4) + Expect(err).ToNot(HaveOccurred()) + Expect(fc).ToNot(BeNil()) + Expect(fc.Spec.ClusterID).To(Equal(testClusterID4)) + Expect(fc.Name).To(Equal("random-name-2")) + }) + + It("should return not found when exhaustive search finds nothing", func() { + fc, err := foreigncluster.GetForeignClusterByID(ctx, cl, nonExistentID) + Expect(err).To(HaveOccurred()) + Expect(kerrors.IsNotFound(err)).To(BeTrue()) + Expect(fc).To(BeNil()) + }) + }) + + Context("Multiple ForeignClusters with same label", func() { + var ( + fc1 *liqov1beta1.ForeignCluster + fc2 *liqov1beta1.ForeignCluster + ) + + BeforeEach(func() { + // Create two ForeignClusters with the same label + // The function should return the older one based on creationTimestamp + fc1 = &liqov1beta1.ForeignCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fc-older", + Labels: map[string]string{ + consts.RemoteClusterID: string(testClusterID1), + }, + CreationTimestamp: metav1.Time{Time: metav1.Now().Add(-24 * time.Hour)}, // 1 day ago + }, + Spec: liqov1beta1.ForeignClusterSpec{ + ClusterID: testClusterID1, + }, + } + fc2 = &liqov1beta1.ForeignCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fc-newer", + Labels: map[string]string{ + consts.RemoteClusterID: string(testClusterID1), + }, + CreationTimestamp: metav1.Now(), // Now + }, + Spec: liqov1beta1.ForeignClusterSpec{ + ClusterID: testClusterID1, + }, + } + cl = fake.NewClientBuilder().WithScheme(scheme).WithObjects(fc1, fc2).Build() + }) + + It("should return the older ForeignCluster when multiple exist", func() { + fc, err := foreigncluster.GetForeignClusterByID(ctx, cl, testClusterID1) + Expect(err).ToNot(HaveOccurred()) + Expect(fc).ToNot(BeNil()) + Expect(fc.Name).To(Equal("fc-older")) + }) + }) + + Context("Edge cases", func() { + It("should handle empty clusterID gracefully", func() { + cl = fake.NewClientBuilder().WithScheme(scheme).Build() + fc, err := foreigncluster.GetForeignClusterByID(ctx, cl, "") + Expect(err).To(HaveOccurred()) + Expect(fc).To(BeNil()) + }) + + It("should handle ForeignCluster with empty spec.ClusterID in name-based lookup", func() { + // ForeignCluster with name matching but empty spec.ClusterID should still be found + fcEmpty := &liqov1beta1.ForeignCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: string(testClusterID2), + }, + Spec: liqov1beta1.ForeignClusterSpec{ + ClusterID: "", // Empty + }, + } + cl = fake.NewClientBuilder().WithScheme(scheme).WithObjects(fcEmpty).Build() + + fc, err := foreigncluster.GetForeignClusterByID(ctx, cl, testClusterID2) + Expect(err).ToNot(HaveOccurred()) + Expect(fc).ToNot(BeNil()) + Expect(fc.Name).To(Equal(string(testClusterID2))) + }) + }) + + Context("Combination scenarios (testing fallback chain)", func() { + It("should prefer label-based lookup over name-based when both exist", func() { + // Create two ForeignClusters: one with label, one matching by name + fcWithLabel := &liqov1beta1.ForeignCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "fc-with-label", + Labels: map[string]string{ + consts.RemoteClusterID: string(testClusterID1), + }, + }, + Spec: liqov1beta1.ForeignClusterSpec{ + ClusterID: testClusterID1, + }, + } + fcByName := &liqov1beta1.ForeignCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: string(testClusterID1), // Name matches + }, + Spec: liqov1beta1.ForeignClusterSpec{ + ClusterID: testClusterID1, + }, + } + cl = fake.NewClientBuilder().WithScheme(scheme).WithObjects(fcWithLabel, fcByName).Build() + + fc, err := foreigncluster.GetForeignClusterByID(ctx, cl, testClusterID1) + Expect(err).ToNot(HaveOccurred()) + Expect(fc).ToNot(BeNil()) + // Should return the one found by label (fast path) + Expect(fc.Name).To(Equal("fc-with-label")) + }) + }) +}) diff --git a/test/e2e/pipeline/infra/rke2/.gitignore b/test/e2e/pipeline/infra/rke2/.gitignore new file mode 100644 index 0000000000..699d734c6f --- /dev/null +++ b/test/e2e/pipeline/infra/rke2/.gitignore @@ -0,0 +1,2 @@ +k3s-ansible/ +inventory.yml diff --git a/test/e2e/pipeline/infra/rke2/ansible-blocking-io.py b/test/e2e/pipeline/infra/rke2/ansible-blocking-io.py new file mode 100644 index 0000000000..6eb25a8a71 --- /dev/null +++ b/test/e2e/pipeline/infra/rke2/ansible-blocking-io.py @@ -0,0 +1,18 @@ +#!/usr/bin/python3 + +# This python script is to fix ansible error in CI test. It's not a bug of ansible, but a side-effect of a different change +# Will remove it later if we do not see ansible error in CI + +import os +import sys + +for handle in (sys.stdin, sys.stdout, sys.stderr): + try: + fd = handle.fileno() + except Exception as e: + print(f"Error: {e}") + continue + + os.set_blocking(fd, True) + +print("Blocking I/O is set for all file descriptors") diff --git a/test/e2e/pipeline/infra/rke2/clean.sh b/test/e2e/pipeline/infra/rke2/clean.sh new file mode 100755 index 0000000000..731bd5b8b2 --- /dev/null +++ b/test/e2e/pipeline/infra/rke2/clean.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +# This scripts expects the following variables to be set: +# CLUSTER_NUMBER -> the number of liqo clusters +# K8S_VERSION -> the Kubernetes version +# CNI -> the CNI plugin used +# TMPDIR -> the directory where the test-related files are stored +# BINDIR -> the directory where the test-related binaries are stored +# TEMPLATE_DIR -> the directory where to read the cluster templates +# NAMESPACE -> the namespace where liqo is running +# KUBECONFIGDIR -> the directory where the kubeconfigs are stored +# LIQO_VERSION -> the liqo version to test +# INFRA -> the Kubernetes provider for the infrastructure +# LIQOCTL -> the path where liqoctl is stored +# POD_CIDR_OVERLAPPING -> the pod CIDR of the clusters is overlapping +# CLUSTER_TEMPLATE_FILE -> the file where the cluster template is stored + +set -e # Fail in case of error +set -o nounset # Fail if undefined variables are used +set -o pipefail # Fail if one of the piped commands fails + +error() { + local sourcefile=$1 + local lineno=$2 + echo "An error occurred at $sourcefile:$lineno." +} +trap 'error "${BASH_SOURCE}" "${LINENO}"' ERR + +FILEPATH=$(realpath "$0") +WORKDIR=$(dirname "$FILEPATH") + +# shellcheck disable=SC1091 +# shellcheck source=../../utils.sh +source "$WORKDIR/../../utils.sh" + +TARGET_NAMESPACE="liqo-ci" + +for i in $(seq 1 "${CLUSTER_NUMBER}"); +do + RKE2_CLUSTER_NAME=$(forge_clustername "${i}") + echo "Deleting cluster ${RKE2_CLUSTER_NAME}" + "${KUBECTL}" delete -n "${TARGET_NAMESPACE}" vms "${RKE2_CLUSTER_NAME}-control-plane" --ignore-not-found + "${KUBECTL}" delete -n "${TARGET_NAMESPACE}" vms "${RKE2_CLUSTER_NAME}-worker-1" --ignore-not-found + "${KUBECTL}" delete -n "${TARGET_NAMESPACE}" vms "${RKE2_CLUSTER_NAME}-worker-2" --ignore-not-found +done diff --git a/test/e2e/pipeline/infra/rke2/inventory.template.yml b/test/e2e/pipeline/infra/rke2/inventory.template.yml new file mode 100644 index 0000000000..2cfbe0a257 --- /dev/null +++ b/test/e2e/pipeline/infra/rke2/inventory.template.yml @@ -0,0 +1,38 @@ +--- +all: + children: + rke2_cluster: + children: + rke2_servers: + hosts: + ${CONTROL_PLANE_IP}: + rke2_agents: + hosts: + ${WORKER_1_IP}: + ${WORKER_2_IP}: + + # Required Vars + vars: + ansible_port: 22 + ansible_user: ubuntu + rke2_version: ${K8S_VERSION}+rke2r1 + # The token should be a random string of reasonable length. You can generate + # one with the following commands: + # - openssl rand -base64 64 + # - pwgen -s 64 1 + # You can use ansible-vault to encrypt this value / keep it secret. + rke2_token: "changeme!" + rke2_api_ip: "{{ hostvars[groups['rke2_servers'][0]]['ansible_host'] | default(groups['rke2_servers'][0]) }}" + rke2_config: + cluster-cidr: "${POD_CIDR}" + service-cidr: "${SERVICE_CIDR}" + tls-san: + - "{{ rke2_api_ip }}" + + # Optional vars + # rke2_channel: stable + # rke2_download_kubeconf: true + # rke2_ha_mode: false + # rke2_ha_mode_keepalived: true + # rke2_keepalived_iface: eth0 + # rke2_keepalived_vip: 192.168.1.100 diff --git a/test/e2e/pipeline/infra/rke2/pre-requirements.sh b/test/e2e/pipeline/infra/rke2/pre-requirements.sh new file mode 100755 index 0000000000..a0f43aa0ab --- /dev/null +++ b/test/e2e/pipeline/infra/rke2/pre-requirements.sh @@ -0,0 +1,73 @@ +#!/bin/bash +#shellcheck disable=SC1091 + +# This scripts expects the following variables to be set: +# CLUSTER_NUMBER -> the number of liqo clusters +# K8S_VERSION -> the Kubernetes version +# CNI -> the CNI plugin used +# TMPDIR -> the directory where the test-related files are stored +# BINDIR -> the directory where the test-related binaries are stored +# TEMPLATE_DIR -> the directory where to read the cluster templates +# NAMESPACE -> the namespace where liqo is running +# KUBECONFIGDIR -> the directory where the kubeconfigs are stored +# LIQO_VERSION -> the liqo version to test +# INFRA -> the Kubernetes provider for the infrastructure +# LIQOCTL -> the path where liqoctl is stored +# KUBECTL -> the path where kubectl is stored +# HELM -> the path where helm is stored +# POD_CIDR_OVERLAPPING -> the pod CIDR of the clusters is overlapping +# CLUSTER_TEMPLATE_FILE -> the file where the cluster template is stored + +set -e # Fail in case of error +set -o nounset # Fail if undefined variables are used +set -o pipefail # Fail if one of the piped commands fails + +error() { + local sourcefile=$1 + local lineno=$2 + echo "An error occurred at $sourcefile:$lineno." +} +trap 'error "${BASH_SOURCE}" "${LINENO}"' ERR + +FILEPATH=$(realpath "$0") +WORKDIR=$(dirname "$FILEPATH") + +# shellcheck disable=SC1091 +# shellcheck source=../../utils.sh +source "$WORKDIR/../../utils.sh" + +setup_arch_and_os + +# RKE2 uses the same architecture support as K3s +# kubectl supported architecture list is a superset of the RKE2 one. No need to further compatibility check. +SUPPORTED="darwin-amd64\ndarwin-arm64\nlinux-386\nlinux-amd64\nlinux-arm\nlinux-arm64\nwindows-amd64" +check_supported_arch_and_os "${SUPPORTED}" "${OS}" "${ARCH}" rke2 + +# shellcheck disable=SC2153 +install_kubectl "${OS}" "${ARCH}" "${K8S_VERSION}" + +install_helm "${OS}" "${ARCH}" + +# install ansible + +# ensure pipx is installed +if ! command -v pipx &> /dev/null; then + python3 -m pip install --user pipx + python3 -m pipx ensurepath --force + source "$HOME/.bashrc" || true + + sudo apt update + sudo apt install -y python3-venv +fi + +# ensure envsubst is installed +if ! command -v envsubst &> /dev/null; then + sudo apt update + sudo apt install -y gettext +fi + +# ensure ansible is installed +if ! command -v ansible &> /dev/null; then + pipx install --include-deps ansible + ansible-playbook --version +fi diff --git a/test/e2e/pipeline/infra/rke2/setup.sh b/test/e2e/pipeline/infra/rke2/setup.sh new file mode 100755 index 0000000000..f886a8946e --- /dev/null +++ b/test/e2e/pipeline/infra/rke2/setup.sh @@ -0,0 +1,147 @@ +#!/bin/bash + +# This scripts expects the following variables to be set: +# CLUSTER_NUMBER -> the number of liqo clusters +# K8S_VERSION -> the Kubernetes version +# CNI -> the CNI plugin used +# TMPDIR -> the directory where the test-related files are stored +# BINDIR -> the directory where the test-related binaries are stored +# TEMPLATE_DIR -> the directory where to read the cluster templates +# NAMESPACE -> the namespace where liqo is running +# KUBECONFIGDIR -> the directory where the kubeconfigs are stored +# LIQO_VERSION -> the liqo version to test +# INFRA -> the Kubernetes provider for the infrastructure +# LIQOCTL -> the path where liqoctl is stored +# POD_CIDR_OVERLAPPING -> the pod CIDR of the clusters is overlapping +# CLUSTER_TEMPLATE_FILE -> the file where the cluster template is stored + +set -e # Fail in case of error +set -o nounset # Fail if undefined variables are used +set -o pipefail # Fail if one of the piped commands fails + +error() { + local sourcefile=$1 + local lineno=$2 + echo "An error occurred at $sourcefile:$lineno." +} +trap 'error "${BASH_SOURCE}" "${LINENO}"' ERR + +FILEPATH=$(realpath "$0") +WORKDIR=$(dirname "$FILEPATH") + +# shellcheck disable=SC1091 +source "$HOME/.bashrc" || true + +# shellcheck disable=SC1091 +# shellcheck source=../../utils.sh +source "$WORKDIR/../../utils.sh" + +check_host_login() { + local host=$1 + local user=$2 + local key=$3 + local timeout=${4:-"600"} + + s=$(date +%s) + local start=${s} + while true; do + if ssh -i "${key}" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 "${user}@${host}" exit; then + break + fi + if [[ $(( $(date +%s) - start )) -gt ${timeout} ]]; then + echo "Timeout reached while waiting for the host to be reachable" + exit 1 + fi + sleep 5 + done + + sleep 5 + + # check apt is able to take the lock + start=$(date +%s) + while true; do + if ssh -i "${key}" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 "${user}@${host}" sudo apt update; then + break + fi + if [[ $(( $(date +%s) - start )) -gt ${timeout} ]]; then + echo "Timeout reached while waiting for apt to be available" + exit 1 + fi + sleep 5 + done +} + +TARGET_NAMESPACE="liqo-ci" + +BASE_DIR=$(dirname "$0") + +export SERVICE_CIDR=10.100.0.0/16 +export POD_CIDR=10.200.0.0/16 +export POD_CIDR_OVERLAPPING=${POD_CIDR_OVERLAPPING:-"false"} + +for i in $(seq 1 "${CLUSTER_NUMBER}"); +do + RKE2_CLUSTER_NAME=$(forge_clustername "${i}") + echo "Creating cluster ${RKE2_CLUSTER_NAME}" + CLUSTER_NAME="$RKE2_CLUSTER_NAME" envsubst < "$BASE_DIR/vms.template.yaml" | "${KUBECTL}" apply -n "${TARGET_NAMESPACE}" -f - +done + +# Wait for the clusters to be ready +for i in $(seq 1 "${CLUSTER_NUMBER}"); +do + RKE2_CLUSTER_NAME=$(forge_clustername "${i}") + "${KUBECTL}" wait --for=condition=Ready --timeout=20m vm "${RKE2_CLUSTER_NAME}-control-plane" -n "${TARGET_NAMESPACE}" + "${KUBECTL}" wait --for=condition=Ready --timeout=20m vm "${RKE2_CLUSTER_NAME}-worker-1" -n "${TARGET_NAMESPACE}" + "${KUBECTL}" wait --for=condition=Ready --timeout=20m vm "${RKE2_CLUSTER_NAME}-worker-2" -n "${TARGET_NAMESPACE}" + + "${KUBECTL}" wait --for=condition=Ready --timeout=20m vmi "${RKE2_CLUSTER_NAME}-control-plane" -n "${TARGET_NAMESPACE}" + "${KUBECTL}" wait --for=condition=Ready --timeout=20m vmi "${RKE2_CLUSTER_NAME}-worker-1" -n "${TARGET_NAMESPACE}" + "${KUBECTL}" wait --for=condition=Ready --timeout=20m vmi "${RKE2_CLUSTER_NAME}-worker-2" -n "${TARGET_NAMESPACE}" +done + +SSH_KEY_FILE="${TMPDIR}/id_rsa" +echo "${SSH_KEY_PATH}" > "${SSH_KEY_FILE}" +chmod 600 "${SSH_KEY_FILE}" + +rm -rf rke2-ansible || true +git clone https://github.com/rancherfederal/rke2-ansible.git +cd rke2-ansible + +for i in $(seq 1 "${CLUSTER_NUMBER}"); +do + RKE2_CLUSTER_NAME=$(forge_clustername "${i}") + + if [[ ${POD_CIDR_OVERLAPPING} != "true" ]]; then + # this should avoid the ipam to reserve a pod CIDR of another cluster as local external CIDR causing remapping + export POD_CIDR="10.$((i * 10)).0.0/16" + fi + + _CONTROL_PLANE_IP=$("${KUBECTL}" get vmi "${RKE2_CLUSTER_NAME}-control-plane" -n "${TARGET_NAMESPACE}" -o jsonpath='{.status.interfaces[0].ipAddress}') + _WORKER_1_IP=$("${KUBECTL}" get vmi "${RKE2_CLUSTER_NAME}-worker-1" -n "${TARGET_NAMESPACE}" -o jsonpath='{.status.interfaces[0].ipAddress}') + _WORKER_2_IP=$("${KUBECTL}" get vmi "${RKE2_CLUSTER_NAME}-worker-2" -n "${TARGET_NAMESPACE}" -o jsonpath='{.status.interfaces[0].ipAddress}') + export CONTROL_PLANE_IP="${_CONTROL_PLANE_IP}" + export WORKER_1_IP="${_WORKER_1_IP}" + export WORKER_2_IP="${_WORKER_2_IP}" + + check_host_login "${CONTROL_PLANE_IP}" "ubuntu" "${SSH_KEY_FILE}" + check_host_login "${WORKER_1_IP}" "ubuntu" "${SSH_KEY_FILE}" + check_host_login "${WORKER_2_IP}" "ubuntu" "${SSH_KEY_FILE}" + + # if running in GitHub Actions + if [[ -n "${GITHUB_ACTIONS}" ]]; then + sudo python3 "${BASE_DIR}/ansible-blocking-io.py" + fi + + ansible-playbook --version + envsubst < "$BASE_DIR/inventory.template.yml" > inventory.yml + ansible-playbook playbooks/site.yml -i inventory.yml --key-file "${SSH_KEY_FILE}" + + mkdir -p "${TMPDIR}/kubeconfigs" + scp -i "${SSH_KEY_FILE}" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null ubuntu@"${CONTROL_PLANE_IP}":~/.kube/config "${TMPDIR}/kubeconfigs/liqo_kubeconf_${i}" + sed -i "s/127.0.0.1/${CONTROL_PLANE_IP}/g" "${TMPDIR}/kubeconfigs/liqo_kubeconf_${i}" + + # add default namespace to kubeconfig + KUBECONFIG="${TMPDIR}/kubeconfigs/liqo_kubeconf_${i}" "${KUBECTL}" config set-context --current --namespace=default +done + +cd .. diff --git a/test/e2e/pipeline/infra/rke2/vms.template.yaml b/test/e2e/pipeline/infra/rke2/vms.template.yaml new file mode 100644 index 0000000000..cd15259ca8 --- /dev/null +++ b/test/e2e/pipeline/infra/rke2/vms.template.yaml @@ -0,0 +1,153 @@ +apiVersion: kubevirt.io/v1 +kind: VirtualMachine +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: liqo-ci +spec: + running: true + template: + metadata: + labels: + kubevirt.io/vmpool: control-plane + spec: + domain: + cpu: + cores: 2 + devices: + disks: + - disk: + bus: virtio + name: root + - disk: + bus: virtio + name: cloud-init + networkInterfaceMultiqueue: true + machine: + type: q35 + resources: + requests: + memory: 4Gi + volumes: + - dataVolume: + name: ${CLUSTER_NAME}-control-plane-data-volume + name: root + - name: cloud-init + cloudInitNoCloud: + secretRef: + name: cloud-init + dataVolumeTemplates: + - metadata: + creationTimestamp: null + name: ${CLUSTER_NAME}-control-plane-data-volume + spec: + storage: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 8Gi + source: + registry: + url: docker://ghcr.io/liqotech/ubuntu:22.04 + +--- + +apiVersion: kubevirt.io/v1 +kind: VirtualMachine +metadata: + name: ${CLUSTER_NAME}-worker-1 + namespace: liqo-ci +spec: + running: true + template: + spec: + domain: + cpu: + cores: 2 + devices: + disks: + - disk: + bus: virtio + name: root + - disk: + bus: virtio + name: cloud-init + networkInterfaceMultiqueue: true + machine: + type: q35 + resources: + requests: + memory: 4Gi + volumes: + - dataVolume: + name: ${CLUSTER_NAME}-worker-1-data-volume + name: root + - name: cloud-init + cloudInitNoCloud: + secretRef: + name: cloud-init + dataVolumeTemplates: + - metadata: + creationTimestamp: null + name: ${CLUSTER_NAME}-worker-1-data-volume + spec: + storage: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 8Gi + source: + registry: + url: docker://ghcr.io/liqotech/ubuntu:22.04 + +--- + +apiVersion: kubevirt.io/v1 +kind: VirtualMachine +metadata: + name: ${CLUSTER_NAME}-worker-2 + namespace: liqo-ci +spec: + running: true + template: + spec: + domain: + cpu: + cores: 2 + devices: + disks: + - disk: + bus: virtio + name: root + - disk: + bus: virtio + name: cloud-init + networkInterfaceMultiqueue: true + machine: + type: q35 + resources: + requests: + memory: 4Gi + volumes: + - dataVolume: + name: ${CLUSTER_NAME}-worker-2-data-volume + name: root + - name: cloud-init + cloudInitNoCloud: + secretRef: + name: cloud-init + dataVolumeTemplates: + - metadata: + creationTimestamp: null + name: ${CLUSTER_NAME}-worker-2-data-volume + spec: + storage: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 8Gi + source: + registry: + url: docker://ghcr.io/liqotech/ubuntu:22.04