Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/onboarding-with-existing-gke-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ module "castai_omni_cluster" {
pod_cidr = data.google_container_cluster.gke.cluster_ipv4_cidr
service_cidr = data.google_container_cluster.gke.services_ipv4_cidr
reserved_subnet_cidrs = [data.google_compute_subnetwork.gke_subnet.ip_cidr_range]

skip_helm = var.skip_helm
}

module "castai_omni_edge_location_gcp" {
Expand Down
10 changes: 10 additions & 0 deletions examples/onboarding-with-existing-gke-cluster/providers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ terraform {
source = "hashicorp/helm"
version = ">= 2.0"
}
kubernetes = {
source = "hashicorp/kubernetes"
version = ">= 2.35.0"
}
}
}

Expand All @@ -29,6 +33,12 @@ provider "helm" {
}
}

provider "kubernetes" {
host = "https://${data.google_container_cluster.gke.endpoint}"
token = data.google_client_config.default.access_token
cluster_ca_certificate = base64decode(data.google_container_cluster.gke.master_auth.0.cluster_ca_certificate)
}

provider "castai" {
api_token = var.castai_api_token
api_url = var.castai_api_url
Expand Down
6 changes: 6 additions & 0 deletions examples/onboarding-with-existing-gke-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,9 @@ variable "cluster_id" {
description = "Cast AI Cluster ID"
type = string
}

variable "skip_helm" {
description = "Skip installing any helm release; allows managing helm releases using GitOps"
type = bool
default = false
}
214 changes: 90 additions & 124 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,7 @@ check "reserved_cidrs_required_for_gke" {
}

locals {
liqo_chart_repo = "https://castai.github.io/liqo"
liqo_chart_name = "liqo"
liqo_release_name = "omni"
liqo_image_tag = var.liqo_chart_version

omni_namespace = "castai-omni"
omni_agent_release = "omni-agent"
omni_agent_chart = "omni-agent"
castai_helm_repository = "https://castai.github.io/helm-charts"

# Common Liqo configurations as YAML
common_liqo_yaml_values = <<-EOT
networking:
fabric:
config:
healthProbeBindAddressPort: '7071'
metricsAddressPort: '7072'
EOT

# Select the appropriate set_values based on k8s_provider
provider_helm_values = merge(
{ for v in module.liqo_helm_values_gke : "gke" => v.set_values },
{ for v in module.liqo_helm_values_eks : "eks" => v.set_values },
{ for v in module.liqo_helm_values_aks : "aks" => v.set_values },
)
provider_specific_liqo_values = local.provider_helm_values[var.k8s_provider]
liqo_image_tag = var.liqo_chart_version
}

# GKE-specific Liqo Helm chart configuration
Expand Down Expand Up @@ -76,120 +51,66 @@ module "liqo_helm_values_aks" {
service_cidr = var.service_cidr
}

resource "helm_release" "liqo" {
name = local.liqo_release_name
repository = local.liqo_chart_repo
chart = local.liqo_chart_name
version = var.liqo_chart_version
namespace = local.omni_namespace
create_namespace = true
cleanup_on_fail = true
wait = true

values = [local.common_liqo_yaml_values]
set = local.provider_specific_liqo_values
}
locals {
liqo_chart_repo = "https://castai.github.io/liqo"
liqo_chart_name = "liqo"
liqo_release_name = "omni"

# Wait for Liqo network resources to be ready before proceeding
resource "null_resource" "wait_for_liqo_network" {
provisioner "local-exec" {
command = <<-EOT
set -e

echo "Waiting for Liqo networks.ipam.liqo.io CRD to be established..."
kubectl wait --for condition=established --timeout=300s crd/networks.ipam.liqo.io

echo "Waiting for external CIDR network resource to be created..."
timeout=300
elapsed=0
interval=5

while [ $elapsed -lt $timeout ]; do
CIDR=$(kubectl get networks.ipam.liqo.io -n ${local.omni_namespace} \
-l ipam.liqo.io/network-type=external-cidr \
-o jsonpath='{.items[0].status.cidr}' 2>/dev/null || echo "")

if [ -n "$CIDR" ]; then
echo "External CIDR network resource is ready: $CIDR"
exit 0
fi

echo "Waiting for external CIDR to be populated... ($elapsed/$timeout seconds)"
sleep $interval
elapsed=$((elapsed + interval))
done

echo "Timeout waiting for external CIDR network resource"
exit 1
EOT
}
omni_namespace = "castai-omni"
omni_agent_release = "castai-omni-agent"
omni_agent_chart = "omni-agent"
castai_helm_repository = "https://castai.github.io/helm-charts"

depends_on = [helm_release.liqo]
}
# Omni agent configuration as YAML
omni_agent_yaml_values = <<-EOT
castai:
apiUrl: ${var.api_url}
organizationID: ${var.organization_id}
clusterID: ${var.cluster_id}
clusterName: ${var.cluster_name}
EOT

# Extract the external CIDR value from Liqo network resource
data "external" "liqo_external_cidr" {
program = ["bash", "-c", <<-EOT
CIDR=$(kubectl get networks.ipam.liqo.io -n ${local.omni_namespace} \
-l ipam.liqo.io/network-type=external-cidr \
-o jsonpath='{.items[0].status.cidr}' 2>/dev/null)

if [ -z "$CIDR" ]; then
echo '{"cidr":""}'
else
echo "{\"cidr\":\"$CIDR\"}"
fi
# Common Liqo configuration as YAML
common_liqo_yaml_values = <<-EOT
networking:
fabric:
config:
healthProbeBindAddressPort: '7071'
metricsAddressPort: '7072'
EOT
]

depends_on = [null_resource.wait_for_liqo_network]
# Select the appropriate yaml_values based on k8s_provider
provider_yaml_values = merge(
{ for v in module.liqo_helm_values_gke : "gke" => v.liqo_yaml_values },
{ for v in module.liqo_helm_values_eks : "eks" => v.liqo_yaml_values },
{ for v in module.liqo_helm_values_aks : "aks" => v.liqo_yaml_values },
)
provider_specific_yaml_values = local.provider_yaml_values[var.k8s_provider]

helm_yaml_values = {
castai = {
apiUrl = var.api_url
organizationID = var.organization_id
clusterID = var.cluster_id
clusterName = var.cluster_name
}
liqo = local.provider_specific_yaml_values.liqo
}
}

# CAST AI Omni Agent Helm Release
resource "helm_release" "omni_agent" {
count = var.skip_helm ? 0 : 1

name = local.omni_agent_release
repository = local.castai_helm_repository
chart = local.omni_agent_chart
namespace = local.omni_namespace
create_namespace = true
create_namespace = false
cleanup_on_fail = true
wait = true

set = [
{
name = "network.externalCIDR"
value = data.external.liqo_external_cidr.result.cidr
},
{
name = "network.podCIDR"
value = var.pod_cidr
},
{
name = "castai.apiUrl"
value = var.api_url
},
{
name = "castai.organizationID"
value = var.organization_id
},
{
name = "castai.clusterID"
value = var.cluster_id
},
{
name = "castai.clusterName"
value = var.cluster_name
}
]

set_sensitive = [
{
name = "castai.apiKey"
value = var.api_token
}
]

depends_on = [null_resource.wait_for_liqo_network]
values = [yamlencode(local.helm_yaml_values)]
}

# Enabling CAST AI Omni functionality for a given cluster
Expand All @@ -199,3 +120,48 @@ resource "castai_omni_cluster" "this" {

depends_on = [helm_release.omni_agent]
}

resource "kubernetes_namespace_v1" "omni" {
count = var.skip_helm ? 1 : 0

metadata {
name = local.omni_namespace
}
}

# Secret with API token for GitOps (when skip_helm = true)
resource "kubernetes_secret_v1" "api_token" {
count = var.skip_helm ? 1 : 0

metadata {
name = "castai-omni-agent-token"
namespace = local.omni_namespace
}

data = {
"CASTAI_AGENT_TOKEN" = var.api_token
}

depends_on = [kubernetes_namespace_v1.omni]
}

# ConfigMap with helm values for GitOps (when skip_helm = true)
resource "kubernetes_config_map_v1" "helm_values" {
count = var.skip_helm ? 1 : 0

metadata {
name = "castai-omni-helm-values"
namespace = local.omni_namespace
}

data = {
"liqo.repository" = local.liqo_chart_repo
"liqo.chart" = local.liqo_chart_name
"liqo.version" = var.liqo_chart_version
"omni-agent.repository" = local.castai_helm_repository
"omni-agent.chart" = local.omni_agent_chart
"values.yaml" = yamlencode(local.helm_yaml_values)
}

depends_on = [kubernetes_namespace_v1.omni]
}
80 changes: 31 additions & 49 deletions modules/aks/main.tf
Original file line number Diff line number Diff line change
@@ -1,54 +1,36 @@
locals {
pools_cidrs = ["10.0.0.0/8", "192.168.0.0/16", "172.16.0.0/12", var.service_cidr]
provider = "aks"

basic_set_values = [
{
name = "tag"
value = var.image_tag
},
{
name = "apiServer.address"
value = var.api_server_address
},
{
name = "discovery.config.clusterID"
value = var.cluster_name
},
{
name = "discovery.config.clusterLabels.liqo\\.io/provider"
value = "aks"
},
{
name = "discovery.config.clusterLabels.topology\\.kubernetes\\.io/region"
value = var.cluster_region
},
{
name = "ipam.podCIDR"
value = var.pod_cidr
},
{
name = "ipam.serviceCIDR"
value = var.service_cidr
},
{
name = "ipam.serviceCIDR"
value = var.service_cidr
},
{
name = "telemetry.enabled"
value = "false"
liqo_yaml_values = {
liqo = {
enabled = true
tag = var.image_tag
apiServer = {
address = var.api_server_address
}
discovery = {
config = {
clusterID = var.cluster_name
clusterLabels = merge(
{
"liqo.io/provider" = local.provider
"topology.kubernetes.io/region" = var.cluster_region
},
var.cluster_zone != "" ? {
"topology.kubernetes.io/zone" = var.cluster_zone
} : {}
)
}
}
ipam = {
podCIDR = var.pod_cidr
serviceCIDR = var.service_cidr
pools = local.pools_cidrs
}
telemetry = {
enabled = false
}
}
]

pools_set_values = [
for idx, cidr in local.pools_cidrs : {
name = "ipam.pools[${idx}]"
value = cidr
}
]

all_set_values = concat(
local.basic_set_values,
local.pools_set_values,
)
}
}
6 changes: 3 additions & 3 deletions modules/aks/outputs.tf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
output "set_values" {
description = "All Helm set values for liqo configuration"
value = local.all_set_values
output "liqo_yaml_values" {
description = "Liqo configuration as nested YAML structure"
value = local.liqo_yaml_values
}
Loading
Loading