diff --git a/castai/resource_aks_cluster.go b/castai/resource_aks_cluster.go index 14e644781..e0301933b 100644 --- a/castai/resource_aks_cluster.go +++ b/castai/resource_aks_cluster.go @@ -40,7 +40,7 @@ func resourceAKSCluster() *schema.Resource { Timeouts: &schema.ResourceTimeout{ Create: schema.DefaultTimeout(5 * time.Minute), Update: schema.DefaultTimeout(1 * time.Minute), - Delete: schema.DefaultTimeout(6 * time.Minute), + Delete: schema.DefaultTimeout(15 * time.Minute), }, Schema: map[string]*schema.Schema{ diff --git a/castai/resource_eks_cluster.go b/castai/resource_eks_cluster.go index 244fb9ebc..5b0429426 100644 --- a/castai/resource_eks_cluster.go +++ b/castai/resource_eks_cluster.go @@ -32,7 +32,7 @@ func resourceEKSCluster() *schema.Resource { Timeouts: &schema.ResourceTimeout{ Create: schema.DefaultTimeout(5 * time.Minute), Update: schema.DefaultTimeout(1 * time.Minute), - Delete: schema.DefaultTimeout(6 * time.Minute), + Delete: schema.DefaultTimeout(15 * time.Minute), }, Schema: map[string]*schema.Schema{ diff --git a/castai/resource_gke_cluster.go b/castai/resource_gke_cluster.go index a620bd670..10cc74d0f 100644 --- a/castai/resource_gke_cluster.go +++ b/castai/resource_gke_cluster.go @@ -33,7 +33,7 @@ func resourceGKECluster() *schema.Resource { Timeouts: &schema.ResourceTimeout{ Create: schema.DefaultTimeout(5 * time.Minute), Update: schema.DefaultTimeout(1 * time.Minute), - Delete: schema.DefaultTimeout(6 * time.Minute), // Cluster action timeout is 5 minutes. + Delete: schema.DefaultTimeout(15 * time.Minute), }, Schema: map[string]*schema.Schema{ diff --git a/castai/sdk/api.gen.go b/castai/sdk/api.gen.go index fe903942c..abfeb4558 100644 --- a/castai/sdk/api.gen.go +++ b/castai/sdk/api.gen.go @@ -355,6 +355,7 @@ const ( WorkloadoptimizationV1EventTypeEVENTTYPECONFIGURATIONCHANGEDV2 WorkloadoptimizationV1EventType = "EVENT_TYPE_CONFIGURATION_CHANGEDV2" WorkloadoptimizationV1EventTypeEVENTTYPEFAILEDHELMTESTHOOK WorkloadoptimizationV1EventType = "EVENT_TYPE_FAILED_HELM_TEST_HOOK" WorkloadoptimizationV1EventTypeEVENTTYPEINVALID WorkloadoptimizationV1EventType = "EVENT_TYPE_INVALID" + WorkloadoptimizationV1EventTypeEVENTTYPEMEMORYPRESSUREEVICTION WorkloadoptimizationV1EventType = "EVENT_TYPE_MEMORY_PRESSURE_EVICTION" WorkloadoptimizationV1EventTypeEVENTTYPEOOMKILL WorkloadoptimizationV1EventType = "EVENT_TYPE_OOM_KILL" WorkloadoptimizationV1EventTypeEVENTTYPERECOMMENDEDPODCOUNTCHANGED WorkloadoptimizationV1EventType = "EVENT_TYPE_RECOMMENDED_POD_COUNT_CHANGED" WorkloadoptimizationV1EventTypeEVENTTYPERECOMMENDEDREQUESTSCHANGED WorkloadoptimizationV1EventType = "EVENT_TYPE_RECOMMENDED_REQUESTS_CHANGED" @@ -477,6 +478,7 @@ const ( WorkloadOptimizationAPIListWorkloadEventsParamsTypeEVENTTYPECONFIGURATIONCHANGEDV2 WorkloadOptimizationAPIListWorkloadEventsParamsType = "EVENT_TYPE_CONFIGURATION_CHANGEDV2" WorkloadOptimizationAPIListWorkloadEventsParamsTypeEVENTTYPEFAILEDHELMTESTHOOK WorkloadOptimizationAPIListWorkloadEventsParamsType = "EVENT_TYPE_FAILED_HELM_TEST_HOOK" WorkloadOptimizationAPIListWorkloadEventsParamsTypeEVENTTYPEINVALID WorkloadOptimizationAPIListWorkloadEventsParamsType = "EVENT_TYPE_INVALID" + WorkloadOptimizationAPIListWorkloadEventsParamsTypeEVENTTYPEMEMORYPRESSUREEVICTION WorkloadOptimizationAPIListWorkloadEventsParamsType = "EVENT_TYPE_MEMORY_PRESSURE_EVICTION" WorkloadOptimizationAPIListWorkloadEventsParamsTypeEVENTTYPEOOMKILL WorkloadOptimizationAPIListWorkloadEventsParamsType = "EVENT_TYPE_OOM_KILL" WorkloadOptimizationAPIListWorkloadEventsParamsTypeEVENTTYPERECOMMENDEDPODCOUNTCHANGED WorkloadOptimizationAPIListWorkloadEventsParamsType = "EVENT_TYPE_RECOMMENDED_POD_COUNT_CHANGED" WorkloadOptimizationAPIListWorkloadEventsParamsTypeEVENTTYPERECOMMENDEDREQUESTSCHANGED WorkloadOptimizationAPIListWorkloadEventsParamsType = "EVENT_TYPE_RECOMMENDED_REQUESTS_CHANGED" @@ -4524,6 +4526,7 @@ type WorkloadoptimizationV1DownscalingSettings struct { type WorkloadoptimizationV1Event struct { ConfigurationChangedV2 *WorkloadoptimizationV1ConfigurationChangedEventV2 `json:"configurationChangedV2,omitempty"` FailedHook *WorkloadoptimizationV1FailedHookEvent `json:"failedHook,omitempty"` + MemoryPressureEviction *WorkloadoptimizationV1MemoryPressureEvictionEvent `json:"memoryPressureEviction,omitempty"` OomKill *WorkloadoptimizationV1OOMKillEvent `json:"oomKill,omitempty"` RecommendedPodCountChanged *WorkloadoptimizationV1RecommendedPodCountChangedEvent `json:"recommendedPodCountChanged,omitempty"` RecommendedRequestsChanged *WorkloadoptimizationV1RecommendedRequestsChangedEvent `json:"recommendedRequestsChanged,omitempty"` @@ -4799,6 +4802,17 @@ type WorkloadoptimizationV1MemoryEventSettings struct { ApplyType *WorkloadoptimizationV1ApplyType `json:"applyType,omitempty"` } +// WorkloadoptimizationV1MemoryPressureEvictionEvent defines model for workloadoptimization.v1.MemoryPressureEvictionEvent. +type WorkloadoptimizationV1MemoryPressureEvictionEvent struct { + Containers *[]WorkloadoptimizationV1MemoryPressureEvictionEventContainer `json:"containers,omitempty"` +} + +// WorkloadoptimizationV1MemoryPressureEvictionEventContainer defines model for workloadoptimization.v1.MemoryPressureEvictionEvent.Container. +type WorkloadoptimizationV1MemoryPressureEvictionEventContainer struct { + MemoryUsageGib *float64 `json:"memoryUsageGib"` + Name *string `json:"name,omitempty"` +} + // WorkloadoptimizationV1NewWorkloadScalingPolicy defines model for workloadoptimization.v1.NewWorkloadScalingPolicy. type WorkloadoptimizationV1NewWorkloadScalingPolicy struct { ApplyType WorkloadoptimizationV1ApplyType `json:"applyType"` @@ -5296,7 +5310,6 @@ type WorkloadoptimizationV1WorkloadEvent struct { // Type EventType defines possible types for workload events. Type WorkloadoptimizationV1EventType `json:"type"` - Workload *WorkloadoptimizationV1WorkloadEventWorkload `json:"workload,omitempty"` Workloads *[]WorkloadoptimizationV1WorkloadEventWorkload `json:"workloads,omitempty"` } @@ -5537,7 +5550,14 @@ type RbacServiceAPIListRoleBindingsParams struct { // PageCursor Cursor that defines token indicating where to start the next page. // Empty value indicates to start from beginning of the dataset. PageCursor *string `form:"page.cursor,omitempty" json:"page.cursor,omitempty"` - RoleId *string `form:"roleId,omitempty" json:"roleId,omitempty"` + + // RoleId Filter by role ID. Multiple values can be passed as query parameters (e.g., + // &role_id=x&role_id=y) + RoleId *[]string `form:"roleId,omitempty" json:"roleId,omitempty"` + + // GroupId Filter by group ID. Multiple values can be passed as query parameters + // (e.g., &group_id=x&group_id=y) + GroupId *[]string `form:"groupId,omitempty" json:"groupId,omitempty"` } // RbacServiceAPICreateRoleBindingsJSONBody defines parameters for RbacServiceAPICreateRoleBindings. @@ -5606,6 +5626,12 @@ type CommitmentsAPIGetCommitmentsParams struct { // ClusterId get commitments that are assigned to a cluster ClusterId *string `form:"clusterId,omitempty" json:"clusterId,omitempty"` + + // IncludeUsagePerClusters indicates if usage per clusters should be included + IncludeUsagePerClusters *bool `form:"includeUsagePerClusters,omitempty" json:"includeUsagePerClusters,omitempty"` + + // IncludeUsagePerInstanceTypes indicates if usage per instance types should be included + IncludeUsagePerInstanceTypes *bool `form:"includeUsagePerInstanceTypes,omitempty" json:"includeUsagePerInstanceTypes,omitempty"` } // CommitmentsAPIImportAzureReservationsJSONBody defines parameters for CommitmentsAPIImportAzureReservations. @@ -5649,6 +5675,12 @@ type CommitmentsAPIGetCommitmentParams struct { // IncludeUsage indicated if commitment usage should be included in a response IncludeUsage *bool `form:"includeUsage,omitempty" json:"includeUsage,omitempty"` + + // IncludeUsagePerClusters indicates if usage per clusters should be included + IncludeUsagePerClusters *bool `form:"includeUsagePerClusters,omitempty" json:"includeUsagePerClusters,omitempty"` + + // IncludeUsagePerInstanceTypes indicates if usage per instance types should be included + IncludeUsagePerInstanceTypes *bool `form:"includeUsagePerInstanceTypes,omitempty" json:"includeUsagePerInstanceTypes,omitempty"` } // CommitmentsAPIReplaceCommitmentAssignmentsJSONBody defines parameters for CommitmentsAPIReplaceCommitmentAssignments. diff --git a/castai/sdk/client.gen.go b/castai/sdk/client.gen.go index ccc3d6f77..ebcd1e136 100644 --- a/castai/sdk/client.gen.go +++ b/castai/sdk/client.gen.go @@ -7482,6 +7482,22 @@ func NewRbacServiceAPIListRoleBindingsRequest(server string, organizationId stri } + if params.GroupId != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "groupId", runtime.ParamLocationQuery, *params.GroupId); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + queryURL.RawQuery = queryValues.Encode() } @@ -8991,6 +9007,38 @@ func NewCommitmentsAPIGetCommitmentsRequest(server string, params *CommitmentsAP } + if params.IncludeUsagePerClusters != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "includeUsagePerClusters", runtime.ParamLocationQuery, *params.IncludeUsagePerClusters); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + + if params.IncludeUsagePerInstanceTypes != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "includeUsagePerInstanceTypes", runtime.ParamLocationQuery, *params.IncludeUsagePerInstanceTypes); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + queryURL.RawQuery = queryValues.Encode() } @@ -9270,6 +9318,38 @@ func NewCommitmentsAPIGetCommitmentRequest(server string, commitmentId string, p } + if params.IncludeUsagePerClusters != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "includeUsagePerClusters", runtime.ParamLocationQuery, *params.IncludeUsagePerClusters); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + + if params.IncludeUsagePerInstanceTypes != nil { + + if queryFrag, err := runtime.StyleParamWithLocation("form", true, "includeUsagePerInstanceTypes", runtime.ParamLocationQuery, *params.IncludeUsagePerInstanceTypes); err != nil { + return nil, err + } else if parsed, err := url.ParseQuery(queryFrag); err != nil { + return nil, err + } else { + for k, v := range parsed { + for _, v2 := range v { + queryValues.Add(k, v2) + } + } + } + + } + queryURL.RawQuery = queryValues.Encode() } diff --git a/examples/aks/aks_cluster_with_security/README.MD b/examples/aks/aks_cluster_with_security/README.MD new file mode 100644 index 000000000..a8c9b1041 --- /dev/null +++ b/examples/aks/aks_cluster_with_security/README.MD @@ -0,0 +1,27 @@ +# Example of AKS cluster connected to CAST AI with enabled Kvisor security agent +Following example creates AKS cluster and its supporting resources.\ +After AKS cluster is created it is onboarded to CAST AI.\ +[Kvisor security agent](https://docs.cast.ai/docs/kvisor) is deployed to the cluster and security policies are enabled.\ +See `install_security_agent` and `kvisor_values` variables in `castai.tf` file.\ +Example configuration should be analysed in the following order: +1. Create Virtual network - `vnet.tf` +2. Create AKS cluster - `aks.tf` +3. Create CAST AI related resources to connect AKS cluster to CAST AI - `castai.tf` + +# Usage +1. Rename `tf.vars.example` to `tf.vars` +2. Update `tf.vars` file with your cluster name, cluster region and CAST AI API token. +3. Initialize Terraform. Under example root folder run: +``` +terraform init +``` +4. Run Terraform apply: +``` +terraform apply -var-file=tf.vars +``` +5. To destroy resources created by this example: +``` +terraform destroy -var-file=tf.vars +``` + +Please refer to this guide if you run into any issues https://docs.cast.ai/docs/terraform-troubleshooting diff --git a/examples/aks/aks_cluster_with_security/aks.tf b/examples/aks/aks_cluster_with_security/aks.tf new file mode 100644 index 000000000..d8390d5ce --- /dev/null +++ b/examples/aks/aks_cluster_with_security/aks.tf @@ -0,0 +1,25 @@ +# 2. Create AKS cluster. + +resource "azurerm_kubernetes_cluster" "this" { + name = var.cluster_name + resource_group_name = azurerm_resource_group.this.name + location = azurerm_resource_group.this.location + dns_prefix = var.cluster_name + node_resource_group = "${var.cluster_name}-ng" + + default_node_pool { + name = "default" + # Node count has to be > 2 to successfully deploy CAST AI controller. + node_count = 2 + vm_size = "Standard_D2_v2" + vnet_subnet_id = azurerm_subnet.internal.id + } + + identity { + type = "SystemAssigned" + } + + tags = { + Environment = "Test" + } +} diff --git a/examples/aks/aks_cluster_with_security/castai.tf b/examples/aks/aks_cluster_with_security/castai.tf new file mode 100644 index 000000000..983dd7205 --- /dev/null +++ b/examples/aks/aks_cluster_with_security/castai.tf @@ -0,0 +1,87 @@ +# 3. Connect AKS cluster to CAST AI with enabled Kvisor security agent. + +# Configure Data sources and providers required for CAST AI connection. +data "azurerm_subscription" "current" {} + +# Configure AKS cluster connection to CAST AI using CAST AI aks-cluster module with enabled Kvisor security agent. +module "castai-aks-cluster" { + source = "castai/aks/castai" + + kvisor_grpc_addr = var.kvisor_grpc_addr + + # Kvisor is an open-source security agent from CAST AI. + # install_security_agent by default installs Kvisor controller (k8s: deployment) + # https://docs.cast.ai/docs/kvisor + install_security_agent = true + + # Kvisor configuration examples, enable certain features: + kvisor_values = [ + yamlencode({ + controller = { + extraArgs = { + # UI: Vulnerability management configuration = API: IMAGE_SCANNING + "image-scan-enabled" = true + # UI: Compliance configuration = API: CONFIGURATION_SCANNING + "kube-bench-enabled" = true + "kube-linter-enabled" = true + } + } + + # UI: Runtime Security = API: RUNTIME_SECURITY + agent = { + # In order to enable Runtime security set agent.enabled to true. + # This will install Kvisor agent (k8s: daemonset) + # https://docs.cast.ai/docs/sec-runtime-security + "enabled" = true + + extraArgs = { + # Runtime security configuration examples: + # By default, most users enable the eBPF events and file hash enricher. + # For all flag explanations and code, see: https://github.com/castai/kvisor/blob/main/cmd/agent/daemon/daemon.go + "ebpf-events-enabled" = true + "file-hash-enricher-enabled" = true + # other examples + "netflow-enabled" = false + "netflow-export-interval" = "30s" + "ebpf-program-metrics-enabled" = false + "prom-metrics-export-enabled" = false + "prom-metrics-export-interval" = "30s" + "process-tree-enabled" = false + } + } + }) + ] + + # Deprecated, leave this empty, to prevent setting defaults. + kvisor_controller_extra_args = {} + + # Everything else... + + wait_for_cluster_ready = false + + install_workload_autoscaler = false + install_pod_mutator = false + delete_nodes_on_disconnect = var.delete_nodes_on_disconnect + + api_url = var.castai_api_url + castai_api_token = var.castai_api_token + grpc_url = var.castai_grpc_url + + aks_cluster_name = var.cluster_name + aks_cluster_region = var.cluster_region + node_resource_group = azurerm_kubernetes_cluster.this.node_resource_group + resource_group = azurerm_kubernetes_cluster.this.resource_group_name + + subscription_id = data.azurerm_subscription.current.subscription_id + tenant_id = data.azurerm_subscription.current.tenant_id + + default_node_configuration = module.castai-aks-cluster.castai_node_configurations["default"] + + node_configurations = { + default = { + disk_cpu_ratio = 25 + subnets = [azurerm_subnet.internal.id] + tags = var.tags + } + } +} diff --git a/examples/aks/aks_cluster_with_security/providers.tf b/examples/aks/aks_cluster_with_security/providers.tf new file mode 100644 index 000000000..02739ff98 --- /dev/null +++ b/examples/aks/aks_cluster_with_security/providers.tf @@ -0,0 +1,23 @@ +# Following providers required by AKS and Vnet resources. +provider "azurerm" { + features {} + subscription_id = var.subscription_id +} + +provider "castai" { + api_token = var.castai_api_token + api_url = var.castai_api_url +} + +provider "azuread" { + tenant_id = data.azurerm_subscription.current.tenant_id +} + +provider "helm" { + kubernetes { + host = azurerm_kubernetes_cluster.this.kube_config.0.host + client_certificate = base64decode(azurerm_kubernetes_cluster.this.kube_config.0.client_certificate) + client_key = base64decode(azurerm_kubernetes_cluster.this.kube_config.0.client_key) + cluster_ca_certificate = base64decode(azurerm_kubernetes_cluster.this.kube_config.0.cluster_ca_certificate) + } +} diff --git a/examples/aks/aks_cluster_with_security/tf.vars.example b/examples/aks/aks_cluster_with_security/tf.vars.example new file mode 100644 index 000000000..35261bfb1 --- /dev/null +++ b/examples/aks/aks_cluster_with_security/tf.vars.example @@ -0,0 +1,4 @@ +cluster_name = "" +cluster_region = "" +castai_api_token = "" +subscription_id = "" diff --git a/examples/aks/aks_cluster_with_security/variables.tf b/examples/aks/aks_cluster_with_security/variables.tf new file mode 100644 index 000000000..3cde5693f --- /dev/null +++ b/examples/aks/aks_cluster_with_security/variables.tf @@ -0,0 +1,50 @@ +# AKS cluster variables. +variable "cluster_name" { + type = string + description = "Name of the AKS cluster, resources will be created for." +} + +variable "cluster_region" { + type = string + description = "Region of the AKS cluster, resources will be created for." +} + +variable "castai_api_url" { + type = string + description = "URL of alternative CAST AI API to be used during development or testing" + default = "https://api.cast.ai" +} + +variable "castai_api_token" { + type = string + description = "CAST AI API token created in console.cast.ai API Access keys section" +} + +variable "castai_grpc_url" { + type = string + description = "CAST AI gRPC URL used by pod pinner" + default = "grpc.cast.ai:443" +} + +variable "kvisor_grpc_addr" { + type = string + description = "CAST AI Kvisor optimized GRPC API address" + default = "kvisor.prod-master.cast.ai:443" // If your cluster is in the EU region, update the grpcAddr to: https://kvisor.prod-eu.cast.ai:443 +} + +variable "delete_nodes_on_disconnect" { + type = bool + description = "Optional parameter, if set to true - CAST AI provisioned nodes will be deleted from cloud on cluster disconnection. For production use it is recommended to set it to false." + default = true +} + +variable "tags" { + type = map(any) + description = "Optional tags for new cluster nodes. This parameter applies only to new nodes - tags for old nodes are not reconciled." + default = {} +} + +variable "subscription_id" { + type = string + description = "Azure subscription ID" +} diff --git a/examples/aks/aks_cluster_with_security/versions.tf b/examples/aks/aks_cluster_with_security/versions.tf new file mode 100644 index 000000000..9c3f0a05a --- /dev/null +++ b/examples/aks/aks_cluster_with_security/versions.tf @@ -0,0 +1,14 @@ +terraform { + required_providers { + azurerm = { + source = "hashicorp/azurerm" + } + azuread = { + source = "hashicorp/azuread" + } + castai = { + source = "castai/castai" + } + } + required_version = ">= 0.13" +} diff --git a/examples/aks/aks_cluster_with_security/vnet.tf b/examples/aks/aks_cluster_with_security/vnet.tf new file mode 100644 index 000000000..77677a71f --- /dev/null +++ b/examples/aks/aks_cluster_with_security/vnet.tf @@ -0,0 +1,20 @@ +# 1. Create virtual network and resource group for the cluster. + +resource "azurerm_resource_group" "this" { + name = var.cluster_name + location = var.cluster_region +} + +resource "azurerm_virtual_network" "this" { + name = "${var.cluster_name}-network" + location = azurerm_resource_group.this.location + resource_group_name = azurerm_resource_group.this.name + address_space = ["10.1.0.0/16"] +} + +resource "azurerm_subnet" "internal" { + name = "internal" + virtual_network_name = azurerm_virtual_network.this.name + resource_group_name = azurerm_resource_group.this.name + address_prefixes = ["10.1.0.0/22"] +} diff --git a/examples/eks/eks_cluster_with_security/README.MD b/examples/eks/eks_cluster_with_security/README.MD new file mode 100644 index 000000000..f6c8dec10 --- /dev/null +++ b/examples/eks/eks_cluster_with_security/README.MD @@ -0,0 +1,34 @@ +# Example of EKS cluster connected to CAST AI with enabled Kvisor security agent +Following this example creates EKS cluster and its supporting resources using AWS community modules.\ +After EKS cluster is created it is onboarded to CAST AI.\ +[Kvisor security agent](https://docs.cast.ai/docs/kvisor) is deployed to the cluster and security policies are enabled.\ +See `install_security_agent` and `kvisor_values` variables in `castai.tf` file.\ +Example configuration should be analysed in the following order: +1. Create VPC - `vpc.tf` +2. Create EKS cluster - `eks.tf` +3. Create CAST AI related resources to connect EKS cluster to CAST AI in read-only mode - `castai.tf` + +# Usage +1. Rename `tf.vars.example` to `tf.vars` +2. Update `tf.vars` file with your cluster name, cluster region and CAST AI API token + +| Variable | Description | +| --- | --- | +| cluster_name = "" | Name of cluster | +| cluster_region = "" | Name of region of cluster | +| castai_api_token = "" | Cast api token | + +3. Initialize Terraform. Under example root folder run: +``` +terraform init +``` +4. Run Terraform apply: +``` +terraform apply -var-file=tf.vars +``` +5. To destroy resources created by this example: +``` +terraform destroy -var-file=tf.vars +``` + +Please refer to this guide if you run into any issues https://docs.cast.ai/docs/terraform-troubleshooting diff --git a/examples/eks/eks_cluster_with_security/castai.tf b/examples/eks/eks_cluster_with_security/castai.tf new file mode 100644 index 000000000..fc45d9fd7 --- /dev/null +++ b/examples/eks/eks_cluster_with_security/castai.tf @@ -0,0 +1,140 @@ +# 3. Connect EKS cluster to CAST AI with enabled Kvisor security agent. + +# Configure Data sources and providers required for CAST AI connection. +data "aws_caller_identity" "current" {} + +# Configure EKS cluster connection using CAST AI eks-cluster module. +resource "castai_eks_clusterid" "cluster_id" { + account_id = data.aws_caller_identity.current.account_id + region = var.cluster_region + cluster_name = var.cluster_name +} + +resource "castai_eks_user_arn" "castai_user_arn" { + cluster_id = castai_eks_clusterid.cluster_id.id +} + +# Create AWS IAM policies and a user to connect to CAST AI. +module "castai-eks-role-iam" { + source = "castai/eks-role-iam/castai" + + aws_account_id = data.aws_caller_identity.current.account_id + aws_cluster_region = var.cluster_region + aws_cluster_name = var.cluster_name + aws_cluster_vpc_id = module.vpc.vpc_id + + castai_user_arn = castai_eks_user_arn.castai_user_arn.arn + + create_iam_resources_per_cluster = true +} + +# Install CAST AI with enabled Kvisor security agent. +module "castai-eks-cluster" { + source = "castai/eks-cluster/castai" + + kvisor_grpc_addr = var.kvisor_grpc_addr + + # Kvisor is an open-source security agent from CAST AI. + # install_security_agent by default installs Kvisor controller (k8s: deployment) + # https://docs.cast.ai/docs/kvisor + install_security_agent = true + + # Kvisor configuration examples, enable certain features: + kvisor_values = [ + yamlencode({ + controller = { + extraArgs = { + # UI: Vulnerability management configuration = API: IMAGE_SCANNING + "image-scan-enabled" = true + # UI: Compliance configuration = API: CONFIGURATION_SCANNING + "kube-bench-enabled" = true + "kube-linter-enabled" = true + } + } + + # UI: Runtime Security = API: RUNTIME_SECURITY + agent = { + # In order to enable Runtime security set agent.enabled to true. + # This will install Kvisor agent (k8s: daemonset) + # https://docs.cast.ai/docs/sec-runtime-security + "enabled" = true + + extraArgs = { + # Runtime security configuration examples: + # By default, most users enable the eBPF events and file hash enricher. + # For all flag explanations and code, see: https://github.com/castai/kvisor/blob/main/cmd/agent/daemon/daemon.go + "ebpf-events-enabled" = true + "file-hash-enricher-enabled" = true + # other examples + "netflow-enabled" = false + "netflow-export-interval" = "30s" + "ebpf-program-metrics-enabled" = false + "prom-metrics-export-enabled" = false + "prom-metrics-export-interval" = "30s" + "process-tree-enabled" = false + } + } + }) + ] + + # Deprecated, leave this empty, to prevent setting defaults. + kvisor_controller_extra_args = {} + + # Everything else... + + wait_for_cluster_ready = false + + install_egressd = false + install_workload_autoscaler = false + install_pod_mutator = false + delete_nodes_on_disconnect = false + + api_url = var.castai_api_url + castai_api_token = var.castai_api_token + grpc_url = var.castai_grpc_url + + aws_account_id = data.aws_caller_identity.current.account_id + aws_cluster_region = var.cluster_region + aws_cluster_name = var.cluster_name + + aws_assume_role_arn = module.castai-eks-role-iam.role_arn + + default_node_configuration = module.castai-eks-cluster.castai_node_configurations["default"] + node_configurations = { + default = { + subnets = module.vpc.private_subnets + tags = {} + security_groups = [ + module.eks.cluster_security_group_id, + module.eks.node_security_group_id, + ] + instance_profile_arn = module.castai-eks-role-iam.instance_profile_arn + } + } + + node_templates = { + default_by_castai = { + name = "default-by-castai" + configuration_id = module.castai-eks-cluster.castai_node_configurations["default"] + is_default = true + is_enabled = true + should_taint = false + + constraints = { + on_demand = true + spot = false + use_spot_fallbacks = false + + enable_spot_diversity = false + spot_diversity_price_increase_limit_percent = 20 + + spot_interruption_predictions_enabled = false + spot_interruption_predictions_type = "aws-rebalance-recommendations" + } + } + } + + # module "castai-eks-cluster" has to be destroyed before module "castai-eks-role-iam". + depends_on = [module.castai-eks-role-iam, module.eks, module.vpc] +} + diff --git a/examples/eks/eks_cluster_with_security/eks.tf b/examples/eks/eks_cluster_with_security/eks.tf new file mode 100644 index 000000000..fa3646b6c --- /dev/null +++ b/examples/eks/eks_cluster_with_security/eks.tf @@ -0,0 +1,49 @@ +# 2. Create EKS cluster. + +module "eks" { + source = "terraform-aws-modules/eks/aws" + version = "19.4.2" + putin_khuylo = true + + cluster_name = var.cluster_name + cluster_version = var.cluster_version + cluster_endpoint_public_access = true + + cluster_addons = { + coredns = { + most_recent = true + } + kube-proxy = { + most_recent = true + } + vpc-cni = { + most_recent = true + } + } + + vpc_id = module.vpc.vpc_id + subnet_ids = module.vpc.private_subnets + + eks_managed_node_groups = { + node_group_1 = { + name = "${var.cluster_name}-ng-1" + instance_types = ["m5.large", "m5.xlarge", "t3.large"] + desired_size = 2 + } + } + + manage_aws_auth_configmap = true + + aws_auth_roles = [ + # Add the CAST AI IAM role which required for CAST AI nodes to join the cluster. + { + rolearn = module.castai-eks-role-iam.instance_profile_role_arn + username = "system:node:{{EC2PrivateDNSName}}" + groups = [ + "system:bootstrappers", + "system:nodes", + ] + } + ] + +} diff --git a/examples/eks/eks_cluster_with_security/providers.tf b/examples/eks/eks_cluster_with_security/providers.tf new file mode 100644 index 000000000..9c655df43 --- /dev/null +++ b/examples/eks/eks_cluster_with_security/providers.tf @@ -0,0 +1,34 @@ +# Following providers required by EKS and VPC modules. +provider "aws" { + region = var.cluster_region + profile = var.profile +} + +provider "castai" { + api_url = var.castai_api_url + api_token = var.castai_api_token +} + +provider "kubernetes" { + host = module.eks.cluster_endpoint + cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "aws" + # This requires the awscli to be installed locally where Terraform is executed + args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name, "--region", var.cluster_region] + } +} + +provider "helm" { + kubernetes { + host = module.eks.cluster_endpoint + cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data) + exec { + api_version = "client.authentication.k8s.io/v1beta1" + command = "aws" + # This requires the awscli to be installed locally where Terraform is executed. + args = ["eks", "get-token", "--cluster-name", var.cluster_name, "--region", var.cluster_region, "--profile", var.profile] + } + } +} diff --git a/examples/eks/eks_cluster_with_security/tf.vars.example b/examples/eks/eks_cluster_with_security/tf.vars.example new file mode 100644 index 000000000..80d887c7a --- /dev/null +++ b/examples/eks/eks_cluster_with_security/tf.vars.example @@ -0,0 +1,4 @@ +cluster_name = "" +cluster_region = "" +castai_api_token = "" +profile = "" \ No newline at end of file diff --git a/examples/eks/eks_cluster_with_security/variables.tf b/examples/eks/eks_cluster_with_security/variables.tf new file mode 100644 index 000000000..cd7485768 --- /dev/null +++ b/examples/eks/eks_cluster_with_security/variables.tf @@ -0,0 +1,46 @@ +# EKS module variables. +variable "cluster_name" { + type = string + description = "EKS cluster name in AWS account." +} + +variable "cluster_region" { + type = string + description = "AWS Region in which EKS cluster and supporting resources will be created." +} + +variable "cluster_version" { + type = string + description = "EKS cluster version." + default = "1.28" +} + +variable "castai_api_token" { + type = string + description = "CAST AI API token created in console.cast.ai API Access keys section" +} + +variable "castai_api_url" { + type = string + description = "CAST AI url to API, default value is https://api.cast.ai" + default = "https://api.cast.ai" +} + +variable "castai_grpc_url" { + type = string + description = "CAST AI gRPC URL used by pod pinner" + default = "grpc.cast.ai:443" +} + +variable "kvisor_grpc_addr" { + type = string + description = "CAST AI Kvisor optimized GRPC API address" + default = "kvisor.prod-master.cast.ai:443" // If your cluster is in the EU region, update the grpcAddr to: https://kvisor.prod-eu.cast.ai:443 +} + +# EKS module variables. +variable "profile" { + type = string + description = "Profile used with AWS CLI" + default = "default" +} \ No newline at end of file diff --git a/examples/eks/eks_cluster_with_security/versions.tf b/examples/eks/eks_cluster_with_security/versions.tf new file mode 100644 index 000000000..70ee44992 --- /dev/null +++ b/examples/eks/eks_cluster_with_security/versions.tf @@ -0,0 +1,17 @@ +terraform { + required_providers { + castai = { + source = "castai/castai" + } + kubernetes = { + source = "hashicorp/kubernetes" + } + helm = { + source = "hashicorp/helm" + } + aws = { + source = "hashicorp/aws" + } + } + required_version = ">= 0.13" +} diff --git a/examples/eks/eks_cluster_with_security/vpc.tf b/examples/eks/eks_cluster_with_security/vpc.tf new file mode 100644 index 000000000..e799c9390 --- /dev/null +++ b/examples/eks/eks_cluster_with_security/vpc.tf @@ -0,0 +1,31 @@ +#1. Create VPC. +data "aws_availability_zones" "available" {} + +module "vpc" { + source = "terraform-aws-modules/vpc/aws" + version = "5.0.0" + + name = var.cluster_name + cidr = "10.0.0.0/16" + + azs = data.aws_availability_zones.available.names + private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"] + public_subnets = ["10.0.4.0/24", "10.0.5.0/24", "10.0.6.0/24"] + + enable_nat_gateway = true + single_nat_gateway = true + + tags = { + "kubernetes.io/cluster/${var.cluster_name}" = "shared" + } + + public_subnet_tags = { + "kubernetes.io/cluster/${var.cluster_name}" = "shared" + "kubernetes.io/role/elb" = 1 + } + + private_subnet_tags = { + "kubernetes.io/cluster/${var.cluster_name}" = "shared" + "kubernetes.io/role/internal-elb" = 1 + } +} diff --git a/examples/gke/gke_cluster_with_security/README.MD b/examples/gke/gke_cluster_with_security/README.MD new file mode 100644 index 000000000..f3d012a6b --- /dev/null +++ b/examples/gke/gke_cluster_with_security/README.MD @@ -0,0 +1,27 @@ +# Example of GKE cluster connected to CAST AI with enabled Kvisor security agent +Following example creates GKE cluster and its supporting resources using GKE community modules. +After GKE cluster is created it is onboarded to CAST AI. +[Kvisor security agent](https://docs.cast.ai/docs/kvisor) is deployed to the cluster and security policies are enabled.\ +See `install_security_agent` and `kvisor_values` variables in `castai.tf` file.\ +Example configuration should be analysed in the following order: +1. Create VPC - `vpc.tf` +2. Create GKE cluster - `gke.tf` +3. Create CAST AI related resources to connect GKE cluster to CAST AI in read-only mode - `castai.tf` + +# Usage +1. Rename `tf.vars.example` to `tf.vars` +2. Update `tf.vars` file with your project name, cluster name, cluster region and CAST AI API token. +3. Initialize Terraform. Under example root folder run: +``` +terraform init +``` +4. Run Terraform apply: +``` +terraform apply -var-file=tf.vars +``` +5. To destroy resources created by this example: +``` +terraform destroy -var-file=tf.vars +``` + +Please refer to this guide if you run into any issues https://docs.cast.ai/docs/terraform-troubleshooting diff --git a/examples/gke/gke_cluster_with_security/castai.tf b/examples/gke/gke_cluster_with_security/castai.tf new file mode 100644 index 000000000..9a17a7da9 --- /dev/null +++ b/examples/gke/gke_cluster_with_security/castai.tf @@ -0,0 +1,107 @@ +# 3. Connect GKE cluster to CAST AI with enabled Kvisor security agent. + +module "castai-gke-iam" { + source = "castai/gke-iam/castai" + + project_id = var.project_id + gke_cluster_name = var.cluster_name + service_accounts_unique_ids = length(var.service_accounts_unique_ids) == 0 ? [] : var.service_accounts_unique_ids +} + + +# Configure GKE cluster connection to CAST AI with enabled Kvisor security agent. +module "castai-gke-cluster" { + source = "castai/gke-cluster/castai" + + wait_for_cluster_ready = true + kvisor_grpc_addr = var.kvisor_grpc_addr + + # Kvisor is an open-source security agent from CAST AI. + # install_security_agent by default installs Kvisor controller (k8s: deployment) + # https://docs.cast.ai/docs/kvisor + install_security_agent = true + + # Kvisor configuration examples, enable certain features: + kvisor_values = [ + yamlencode({ + controller = { + extraArgs = { + # UI: Vulnerability management configuration = API: IMAGE_SCANNING + "image-scan-enabled" = true + # UI: Compliance configuration = API: CONFIGURATION_SCANNING + "kube-bench-enabled" = true + "kube-linter-enabled" = true + } + } + + # UI: Runtime Security = API: RUNTIME_SECURITY + agent = { + # In order to enable Runtime security set agent.enabled to true. + # This will install Kvisor agent (k8s: daemonset) + # https://docs.cast.ai/docs/sec-runtime-security + "enabled" = true + + extraArgs = { + # Runtime security configuration examples: + # By default, most users enable the eBPF events and file hash enricher. + # For all flag explanations and code, see: https://github.com/castai/kvisor/blob/main/cmd/agent/daemon/daemon.go + "ebpf-events-enabled" = true + "file-hash-enricher-enabled" = true + # other examples + "netflow-enabled" = false + "netflow-export-interval" = "30s" + "ebpf-program-metrics-enabled" = false + "prom-metrics-export-enabled" = false + "prom-metrics-export-interval" = "30s" + "process-tree-enabled" = false + } + } + }) + ] + + # Deprecated, leave this empty, to prevent setting defaults. + kvisor_controller_extra_args = {} + + # Everything else ... + + install_workload_autoscaler = false + install_cloud_proxy = false + install_pod_mutator = false + delete_nodes_on_disconnect = false + + api_url = var.castai_api_url + castai_api_token = var.castai_api_token + grpc_url = var.castai_grpc_url + + project_id = var.project_id + gke_cluster_name = var.cluster_name + gke_cluster_location = var.cluster_region + + gke_credentials = module.castai-gke-iam.private_key + default_node_configuration = module.castai-gke-cluster.castai_node_configurations["default"] + + node_configurations = { + default = { + min_disk_size = 100 + disk_cpu_ratio = 0 + subnets = [module.vpc.subnets_ids[0]] + tags = {} + } + } + + node_templates = { + default_by_castai = { + name = "default-by-castai" + configuration_id = module.castai-gke-cluster.castai_node_configurations["default"] + is_default = true + is_enabled = true + should_taint = false + + constraints = { + on_demand = true + } + } + } + + depends_on = [google_container_cluster.my-k8s-cluster, module.castai-gke-iam] +} \ No newline at end of file diff --git a/examples/gke/gke_cluster_with_security/gke.tf b/examples/gke/gke_cluster_with_security/gke.tf new file mode 100644 index 000000000..1ab145fd3 --- /dev/null +++ b/examples/gke/gke_cluster_with_security/gke.tf @@ -0,0 +1,39 @@ +# 2. Create GKE cluster. + +data "google_client_config" "default" {} + +resource "google_container_cluster" "my-k8s-cluster" { + + initial_node_count = "3" + node_config { + machine_type = "n2-standard-2" # default nodes - not enough mem for cast agent + preemptible = false + } + + location = var.cluster_region + + project = var.project_id + + name = var.cluster_name + network = module.vpc.network_name + subnetwork = module.vpc.subnets_names[0] + + enable_autopilot = "false" + enable_kubernetes_alpha = "false" + enable_l4_ilb_subsetting = "false" + enable_legacy_abac = "false" + enable_tpu = "false" + + node_pool_defaults { + node_config_defaults { + logging_variant = "DEFAULT" + } + } + + networking_mode = "VPC_NATIVE" + ip_allocation_policy { + cluster_secondary_range_name = local.ip_range_pods # Must match the range_name in subnet + services_secondary_range_name = local.ip_range_services # Must match the range_name in subnet + stack_type = "IPV4" + } +} diff --git a/examples/gke/gke_cluster_with_security/providers.tf b/examples/gke/gke_cluster_with_security/providers.tf new file mode 100644 index 000000000..8dd3676a5 --- /dev/null +++ b/examples/gke/gke_cluster_with_security/providers.tf @@ -0,0 +1,13 @@ +# Configure Data sources and providers required for CAST AI connection. +provider "castai" { + api_token = var.castai_api_token + api_url = var.castai_api_url +} + +provider "helm" { + kubernetes { + host = "https://${google_container_cluster.my-k8s-cluster.endpoint}" + token = data.google_client_config.default.access_token + cluster_ca_certificate = base64decode(google_container_cluster.my-k8s-cluster.master_auth[0].cluster_ca_certificate) + } +} \ No newline at end of file diff --git a/examples/gke/gke_cluster_with_security/tf.vars.example b/examples/gke/gke_cluster_with_security/tf.vars.example new file mode 100644 index 000000000..b60750589 --- /dev/null +++ b/examples/gke/gke_cluster_with_security/tf.vars.example @@ -0,0 +1,6 @@ +cluster_name = "" +cluster_region = "" +cluster_zones = ["", ""] +castai_api_token = "" +project_id = "" +service_accounts_unique_ids = ["", ""] \ No newline at end of file diff --git a/examples/gke/gke_cluster_with_security/variables.tf b/examples/gke/gke_cluster_with_security/variables.tf new file mode 100644 index 000000000..a2188dd50 --- /dev/null +++ b/examples/gke/gke_cluster_with_security/variables.tf @@ -0,0 +1,43 @@ +# GKE module variables. +variable "cluster_name" { + type = string + description = "GKE cluster name in GCP project." +} + +variable "cluster_region" { + type = string + description = "The region to create the cluster." +} + +variable "project_id" { + type = string + description = "GCP project ID in which GKE cluster would be created." +} + +variable "castai_api_token" { + type = string + description = "CAST AI API token created in console.cast.ai API Access keys section." +} + +variable "castai_api_url" { + type = string + description = "CAST AI api url" +} + +variable "castai_grpc_url" { + type = string + description = "CAST AI gRPC URL used by pod pinner" + default = "grpc.cast.ai:443" +} + +variable "kvisor_grpc_addr" { + type = string + description = "CAST AI Kvisor optimized GRPC API address" + default = "kvisor.prod-master.cast.ai:443" // If your cluster is in the EU region, update the grpcAddr to: https://kvisor.prod-eu.cast.ai:443 +} + +variable "service_accounts_unique_ids" { + type = list(string) + description = "Service Accounts' unique IDs used by node pools in the cluster." + default = [] +} diff --git a/examples/gke/gke_cluster_with_security/version.tf b/examples/gke/gke_cluster_with_security/version.tf new file mode 100644 index 000000000..502f0c51f --- /dev/null +++ b/examples/gke/gke_cluster_with_security/version.tf @@ -0,0 +1,17 @@ +terraform { + required_providers { + castai = { + source = "castai/castai" + } + kubernetes = { + source = "hashicorp/kubernetes" + } + google = { + source = "hashicorp/google" + } + google-beta = { + source = "hashicorp/google-beta" + } + } + required_version = ">= 0.13" +} diff --git a/examples/gke/gke_cluster_with_security/vpc.tf b/examples/gke/gke_cluster_with_security/vpc.tf new file mode 100644 index 000000000..e1f44996c --- /dev/null +++ b/examples/gke/gke_cluster_with_security/vpc.tf @@ -0,0 +1,35 @@ +# 1. Create VPC. + +locals { + ip_range_pods = "${var.cluster_name}-ip-range-pods" + ip_range_services = "${var.cluster_name}-ip-range-services" + ip_range_nodes = "${var.cluster_name}-ip-range-nodes" +} + +module "vpc" { + source = "terraform-google-modules/network/google" + version = "6.0.0" + project_id = var.project_id + network_name = var.cluster_name + subnets = [ + { + subnet_name = local.ip_range_nodes + subnet_ip = "10.0.0.0/16" + subnet_region = var.cluster_region + subnet_private_access = "true" + }, + ] + + secondary_ranges = { + (local.ip_range_nodes) = [ + { + range_name = local.ip_range_pods + ip_cidr_range = "10.20.0.0/16" + }, + { + range_name = local.ip_range_services + ip_cidr_range = "10.30.0.0/24" + } + ] + } +}