Skip to content

Commit 81242e5

Browse files
authored
Live AWS with native VPC CNI example (#519)
1 parent 0472890 commit 81242e5

20 files changed

Lines changed: 519 additions & 502 deletions

File tree

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
## EKS and CAST AI example with CAST AI Autoscaler policies and additional Node Configurations
2+
3+
Following this example shows how to onboard EKS cluster to CAST AI (Phase 2), and configure CAST AI Live Migration capability.
4+
5+
IAM policies required to connect the cluster to CAST AI in the example are created by [castai/eks-role-iam/castai module](https://github.com/castai/terraform-castai-eks-role-iam).
6+
7+
CAST AI modules are places in one umbrella module that can be reused for different clusters and enabled/disabled
8+
9+
Example configuration should be analysed in the following order:
10+
1. Create VPC - `vpc.tf`
11+
2. Create EKS cluster - `eks.tf`
12+
3. Create IAM and other CAST AI related resources to connect EKS cluster to CAST AI, configure Autoscaler and Node Configurations - `castai.tf`
13+
14+
# Usage
15+
1. Rename `tf.vars.example` to `tf.vars`
16+
2. Update `tf.vars` file with your cluster name, cluster region and CAST AI API token.
17+
18+
| Variable | Description |
19+
| --- | --- |
20+
| cluster_name = "" | Name of cluster |
21+
| cluster_region = "" | Name of region of cluster |
22+
| castai_api_token = "" | Cast api token |
23+
24+
3. Initialize Terraform. Under example root folder run:
25+
```
26+
terraform init
27+
```
28+
4. Run Terraform apply:
29+
```
30+
terraform apply -var-file=tf.vars
31+
```
32+
5. To destroy resources created by this example:
33+
```
34+
terraform destroy -var-file=tf.vars
35+
```
36+
37+
> **Note**
38+
>
39+
> If you are onboarding existing cluster to CAST AI you need to also update [aws-auth](https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html) configmap. In the configmap instance profile
40+
> used by CAST AI has to be present. Example of entry can be found [here](https://github.com/castai/terraform-provider-castai/blob/157babd57b0977f499eb162e9bee27bee51d292a/examples/eks/eks_cluster_autoscaler_polices/eks.tf#L28-L38).
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
module "cluster" {
2+
source = "./module/castai"
3+
count = var.enable_castai ? 1 : 0
4+
5+
cluster_name = var.cluster_name
6+
castai_api_token = var.castai_api_token
7+
cluster_region = var.cluster_region
8+
vpc_id = module.vpc.vpc_id
9+
security_groups = [
10+
module.eks.cluster_security_group_id,
11+
module.eks.node_security_group_id,
12+
aws_security_group.additional.id,
13+
]
14+
subnets = module.vpc.private_subnets
15+
live_proxy_version = var.live_proxy_version
16+
live_helm_version = var.live_helm_version
17+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# 2. Create EKS cluster.
2+
module "eks" {
3+
source = "terraform-aws-modules/eks/aws"
4+
version = "19.4.2"
5+
putin_khuylo = true
6+
7+
cluster_name = var.cluster_name
8+
cluster_version = var.cluster_version
9+
cluster_endpoint_public_access = true
10+
11+
cluster_addons = {
12+
coredns = {
13+
most_recent = true
14+
}
15+
kube-proxy = {
16+
most_recent = true
17+
}
18+
vpc-cni = {
19+
most_recent = true
20+
}
21+
}
22+
23+
vpc_id = module.vpc.vpc_id
24+
subnet_ids = module.vpc.private_subnets
25+
26+
manage_aws_auth_configmap = true
27+
28+
aws_auth_roles = [
29+
# Add the CAST AI IAM role which required for CAST AI nodes to join the cluster.
30+
{
31+
rolearn = module.cluster[0].instance_profile_role_arn
32+
username = "system:node:{{EC2PrivateDNSName}}"
33+
groups = [
34+
"system:bootstrappers",
35+
"system:nodes",
36+
]
37+
},
38+
]
39+
40+
self_managed_node_groups = {
41+
node_group_1 = {
42+
name = "${var.cluster_name}-ng-1"
43+
instance_type = "m5.large"
44+
max_size = 5
45+
min_size = 2
46+
desired_size = 2
47+
}
48+
}
49+
50+
eks_managed_node_groups = {
51+
node_group_spot = {
52+
name = "${var.cluster_name}-spot"
53+
min_size = 1
54+
max_size = 10
55+
desired_size = 1
56+
57+
instance_types = ["t3.large"]
58+
capacity_type = "SPOT"
59+
60+
update_config = {
61+
max_unavailable_percentage = 50 # or set `max_unavailable`
62+
}
63+
}
64+
}
65+
66+
}
67+
68+
# Example additional security group.
69+
resource "aws_security_group" "additional" {
70+
name_prefix = "${var.cluster_name}-additional"
71+
vpc_id = module.vpc.vpc_id
72+
73+
ingress {
74+
from_port = 22
75+
to_port = 22
76+
protocol = "tcp"
77+
cidr_blocks = [
78+
"10.0.0.0/8",
79+
]
80+
}
81+
}
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
locals {
2+
role_name = "castai-eks-role"
3+
4+
default_node_cfg = {
5+
default = {
6+
subnets = var.subnets
7+
tags = var.tags
8+
security_groups = var.security_groups
9+
instance_profile_arn = module.castai-eks-role-iam.instance_profile_arn
10+
}
11+
}
12+
13+
default_node_tmpl = {
14+
default_by_castai = {
15+
name = "default-by-castai"
16+
configuration_id = module.castai-eks-cluster.castai_node_configurations[
17+
"default"
18+
]
19+
is_default = true
20+
is_enabled = true
21+
should_taint = false
22+
23+
constraints = {
24+
on_demand = true
25+
spot = true
26+
use_spot_fallbacks = true
27+
28+
enable_spot_diversity = false
29+
spot_diversity_price_increase_limit_percent = 20
30+
31+
spot_interruption_predictions_enabled = true
32+
spot_interruption_predictions_type = "aws-rebalance-recommendations"
33+
}
34+
}
35+
}
36+
37+
node_configuration = merge(local.default_node_cfg, {
38+
live = {
39+
subnets = var.subnets,
40+
instance_profile_arn = module.castai-eks-role-iam.instance_profile_arn
41+
security_groups = var.security_groups
42+
init_script = base64encode(templatefile("${path.module}/eks-init-script.sh", {
43+
live_proxy_version = trimspace(var.live_proxy_version)
44+
}))
45+
container_runtime = "containerd"
46+
eks_image_family = "al2023"
47+
}
48+
})
49+
50+
node_templates = merge(local.default_node_tmpl, {
51+
live_tmpl = {
52+
configuration_id = module.castai-eks-cluster.castai_node_configurations["live"]
53+
is_enabled = true
54+
should_taint = true
55+
}
56+
})
57+
}
58+
59+
# Configure Data sources and providers required for CAST AI connection.
60+
data "aws_caller_identity" "current" {}
61+
62+
resource "castai_eks_user_arn" "castai_user_arn" {
63+
cluster_id = castai_eks_clusterid.cluster_id.id
64+
}
65+
66+
# Create AWS IAM policies and a user to connect to CAST AI.
67+
module "castai-eks-role-iam" {
68+
source = "castai/eks-role-iam/castai"
69+
70+
aws_account_id = data.aws_caller_identity.current.account_id
71+
aws_cluster_region = var.cluster_region
72+
aws_cluster_name = var.cluster_name
73+
aws_cluster_vpc_id = var.vpc_id
74+
75+
castai_user_arn = castai_eks_user_arn.castai_user_arn.arn
76+
77+
create_iam_resources_per_cluster = true
78+
}
79+
80+
# Configure EKS cluster connection using CAST AI eks-cluster module.
81+
resource "castai_eks_clusterid" "cluster_id" {
82+
account_id = data.aws_caller_identity.current.account_id
83+
region = var.cluster_region
84+
cluster_name = var.cluster_name
85+
}
86+
87+
module "castai-eks-cluster" {
88+
source = "castai/eks-cluster/castai"
89+
90+
api_url = var.castai_api_url
91+
castai_api_token = var.castai_api_token
92+
grpc_url = var.castai_grpc_url
93+
wait_for_cluster_ready = true
94+
95+
aws_account_id = data.aws_caller_identity.current.account_id
96+
aws_cluster_region = var.cluster_region
97+
aws_cluster_name = var.cluster_name
98+
99+
aws_assume_role_arn = module.castai-eks-role-iam.role_arn
100+
delete_nodes_on_disconnect = var.delete_nodes_on_disconnect
101+
102+
default_node_configuration = module.castai-eks-cluster.castai_node_configurations["default"]
103+
104+
node_configurations = local.node_configuration
105+
106+
node_templates = local.node_templates
107+
108+
autoscaler_settings = {
109+
enabled = true
110+
is_scoped_mode = false
111+
node_templates_partial_matching_enabled = false
112+
113+
unschedulable_pods = {
114+
enabled = true
115+
}
116+
117+
node_downscaler = {
118+
enabled = true
119+
120+
empty_nodes = {
121+
enabled = true
122+
}
123+
124+
evictor = {
125+
aggressive_mode = false
126+
cycle_interval = "5m10s"
127+
dry_run = false
128+
enabled = true
129+
node_grace_period_minutes = 10
130+
scoped_mode = false
131+
}
132+
}
133+
134+
cluster_limits = {
135+
enabled = true
136+
137+
cpu = {
138+
max_cores = 20
139+
min_cores = 1
140+
}
141+
}
142+
}
143+
144+
# depends_on helps Terraform with creating proper dependencies graph in case of resource creation and in this case destroy.
145+
# module "castai-eks-cluster" has to be destroyed before module "castai-eks-role-iam".
146+
depends_on = [module.castai-eks-role-iam]
147+
}
148+
149+
resource "helm_release" "live-helm" {
150+
name = "castai-live"
151+
152+
repository = "https://castai.github.io/helm-charts"
153+
chart = "castai-live"
154+
version = var.live_helm_version
155+
156+
namespace = "castai-live"
157+
create_namespace = true
158+
dependency_update = true
159+
160+
set {
161+
name = "castai-aws-vpc-cni.enabled"
162+
value = "true"
163+
}
164+
165+
set {
166+
name = "castai.clusterID"
167+
value = castai_eks_clusterid.cluster_id.id
168+
}
169+
170+
set {
171+
name = "castai.apiKey"
172+
value = var.castai_api_token
173+
}
174+
175+
wait = false
176+
177+
depends_on = [module.castai-eks-cluster]
178+
}

examples/eks/eks_live_migration/eks-init-script.sh renamed to examples/eks/eks_cluster_live_migration/module/castai/eks-init-script.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,31 +14,31 @@ case "$ARCH" in
1414
;;
1515
esac
1616

17-
CRI_URL=https://storage.googleapis.com/castai-node-components/castai-cri-proxy/releases/0.27.0
17+
CRI_URL=https://storage.googleapis.com/castai-node-components/castai-cri-proxy/releases/${live_proxy_version}
1818

19-
wget ${CRI_URL}/castai-cri-proxy-linux-${ARCH}.tar.gz -O /var/tmp/castai-cri-proxy-linux-${ARCH}.tar.gz
20-
wget ${CRI_URL}/castai-cri-proxy_SHA256SUMS -O /var/tmp/proxy_SHA256SUMS
19+
wget $CRI_URL/castai-cri-proxy-linux-$ARCH.tar.gz -O /var/tmp/castai-cri-proxy-linux-$ARCH.tar.gz
20+
wget $CRI_URL/castai-cri-proxy_SHA256SUMS -O /var/tmp/proxy_SHA256SUMS
2121
SHA256_AMD64_FROM_FILE=$(head -n 1 /var/tmp/proxy_SHA256SUMS | awk '{print $1}')
2222
SHA256_ARM64_FROM_FILE=$(sed -n '2p' /var/tmp/proxy_SHA256SUMS | awk '{print $1}')
2323
pushd /var/tmp
2424
sha256sum --ignore-missing --check /var/tmp/proxy_SHA256SUMS
2525
popd
26-
tar -xvzf /var/tmp/castai-cri-proxy-linux-${ARCH}.tar.gz -C /var/tmp/ cri-proxy
26+
tar -xvzf /var/tmp/castai-cri-proxy-linux-$ARCH.tar.gz -C /var/tmp/ cri-proxy
2727
chmod +x /var/tmp/cri-proxy
2828

2929
cat <<EOF >/var/tmp/pre-install.yaml
3030
packages:
3131
cri-proxy:
32-
downloadURL: ${CRI_URL}
32+
downloadURL: $CRI_URL
3333
unpackDir: /usr/local/bin
3434
customUnpackLocations:
3535
cni-proxy: /opt/cni/bin/
3636
arch:
3737
amd64:
3838
fileName: castai-cri-proxy-linux-amd64.tar.gz
39-
sha256sum: ${SHA256_AMD64_FROM_FILE}
39+
sha256sum: $SHA256_AMD64_FROM_FILE
4040
arm64:
4141
fileName: castai-cri-proxy-linux-arm64.tar.gz
42-
sha256sum: ${SHA256_ARM64_FROM_FILE}
42+
sha256sum: $SHA256_ARM64_FROM_FILE
4343
EOF
44-
sudo /var/tmp/cri-proxy install --base-config=amazon-linux-2023 --config /var/tmp/pre-install.yaml --debug
44+
sudo /var/tmp/cri-proxy install --base-config=amazon-linux-2023 --config /var/tmp/pre-install.yaml --debug
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
output "instance_profile_role_arn" {
2+
value = module.castai-eks-role-iam.instance_profile_role_arn
3+
}

0 commit comments

Comments
 (0)