Skip to content

creating kubeconfig file failed when creating private cluster #410

@caili-zhang-sas

Description

@caili-zhang-sas

Terraform Version Details

Run ./files/tools/iac_tooling_version.sh to show the version

{
  "terraform_version": "\"1.10.5\"",
  "terraform_revision": "null",
  "terraform_outdated": "true",
  "provider_selections": "{\"registry.terraform.io/hashicorp/aws\":\"5.100.0\",\"registry.terraform.io/hashicorp/cloudinit\":\"2.3.7\",\"registry.terraform.io/hashicorp/external\":\"2.3.5\",\"registry.terraform.io/hashicorp/kubernetes\":\"2.38.0\",\"registry.terraform.io/hashicorp/local\":\"2.5.3\",\"registry.terraform.io/hashicorp/null\":\"3.2.4\",\"registry.terraform.io/hashicorp/random\":\"3.7.2\",\"registry.terraform.io/hashicorp/time\":\"0.13.1\",\"registry.terraform.io/hashicorp/tls\":\"4.1.0\"}"
}

Terraform Variable File Details

# !NOTE! - These are only a subset of the variables in CONFIG-VARS.md provided
# as examples. Customize this file to add any variables from CONFIG-VARS.md whose
# default values you want to change.

# ****************  REQUIRED VARIABLES  ****************
# These required variables' values MUST be provided by the User
prefix   = "xxx-viya"
location = "ap-northeast-1" # e.g., "us-east-1"
# ****************  REQUIRED VARIABLES  ****************

# !NOTE! - Without specifying your CIDR block access rules, ingress traffic
#          to your cluster will be blocked by default.

# **************  RECOMMENDED  VARIABLES  ***************
cluster_api_mode             = "private"
default_private_access_cidrs = ["xx.xx.xx.xx/32“] 
ssh_public_key              = "~/.ssh/id_rsa.pub"
# **************  RECOMMENDED  VARIABLES  ***************

# Tags for all tagable items in your cluster.
tags = {"resourceowner"="caili.zhang@sas.com"} # e.g., { "key1" = "value1", "key2" = "value2" }

# Postgres config - By having this entry a database server is created. If you do not
#                   need an external database server remove the 'postgres_servers'
#                   block below.
postgres_servers = {
  default = {
    "server_version" = "16"
    "instance_type"  = "db.m6idn.xlarge"
    "storage_size"   = 128
    "administrator_login" = "pgadmin"
    "administrator_password" = "Password123!"
  }
}

## Cluster config
kubernetes_version           = "1.31"
default_nodepool_node_count  = 1
default_nodepool_vm_type     = "r6in.2xlarge"
default_nodepool_custom_data = ""

## General
storage_type         = "standard"

## Cluster Node Pools config - minimal
node_pools = {
  cas = {
    "vm_type"      = "r6idn.2xlarge"
    "cpu_type"     = "AL2023_x86_64_STANDARD"
    "os_disk_type" = "gp3"
    "os_disk_size" = 200
    "os_disk_iops" = 0
    "min_nodes"    = 1
    "max_nodes"    = 1
    "node_taints"  = ["workload.sas.com/class=cas:NoSchedule"]
    "node_labels" = {
      "workload.sas.com/class" = "cas"
    }
    "custom_data"                          = ""
    "metadata_http_endpoint"               = "enabled"
    "metadata_http_tokens"                 = "required"
    "metadata_http_put_response_hop_limit" = 1
  },
  compute = {
    "vm_type"      = "i4i.4xlarge"
    "cpu_type"     = "AL2023_x86_64_STANDARD"
    "os_disk_type" = "gp3"
    "os_disk_size" = 200
    "os_disk_iops" = 0
    "min_nodes"    = 1
    "max_nodes"    = 1
    "node_taints"  = ["workload.sas.com/class=compute:NoSchedule"]
    "node_labels" = {
      "workload.sas.com/class"        = "compute"
      "launcher.sas.com/prepullImage" = "sas-programming-environment"
    }
    "custom_data"                          = ""
    "metadata_http_endpoint"               = "enabled"
    "metadata_http_tokens"                 = "required"
    "metadata_http_put_response_hop_limit" = 1
  },
  stateless = {
    "vm_type"      = "r6in.4xlarge"
    "cpu_type"     = "AL2023_x86_64_STANDARD"
    "os_disk_type" = "gp3"
    "os_disk_size" = 200
    "os_disk_iops" = 0
    "min_nodes"    = 1
    "max_nodes"    = 1
    "node_taints"  = ["workload.sas.com/class=stateless:NoSchedule"]
    "node_labels" = {
      "workload.sas.com/class" = "stateless"
    }
    "custom_data"                          = ""
    "metadata_http_endpoint"               = "enabled"
    "metadata_http_tokens"                 = "required"
    "metadata_http_put_response_hop_limit" = 1
  },
  stateful = {
    "vm_type"      = "r6in.2xlarge"
    "cpu_type"     = "AL2023_x86_64_STANDARD"
    "os_disk_type" = "gp3"
    "os_disk_size" = 200
    "os_disk_iops" = 0
    "min_nodes"    = 1
    "max_nodes"    = 1
    "node_taints"  = ["workload.sas.com/class=stateful:NoSchedule"]
    "node_labels" = {
      "workload.sas.com/class" = "stateful"
    }
    "custom_data"                          = ""
    "metadata_http_endpoint"               = "enabled"
    "metadata_http_tokens"                 = "required"
    "metadata_http_put_response_hop_limit" = 1
  }
}
# Jump Server
create_jump_vm = false

# NFS Server
# required ONLY when storage_type is "standard" to create NFS Server VM
create_nfs_public_ip = false
nfs_vm_admin         = "nfsuser"
nfs_vm_type          = "m6in.xlarge"
nfs_raid_disk_type   = "gp3"

Steps to Reproduce

My setting is creating Jump Server manually, and generate viya cluster without jump server.

  • Jump Server ( manual created )
  • VPC 1 ( default VPC)
  • VPC 2 ( created by IaC tool)
  • cluster ( created by IaC tool)
  • RDS ( created by IaC tool)
  • NFS server ( created by IaC tool)

Steps:

  1. create a jump server in VPC 1
  2. run IaC tool to create VPC 2, cluster, RDS, NFS server

Expected Behavior

terraform apply without error

cd /sas/viya4-iac-aws
TFVARS=/sas/viya4-iac-aws/terraform.tfvars
TFPLAN=/sas/viya4-iac-aws/terraform.plan
terraform plan \
    -input=false \
    -var-file=${TFVARS} \
    -out ${TFPLAN}

TFPLAN=/sas/viya4-iac-aws/terraform.plan
TFSTATE=/sas/viya4-iac-aws/terraform.tfstate
TFLOG=/sas/viya4-iac-aws/terraform-apply.log
terraform apply \
    -state ${TFSTATE} \
    ${TFPLAN} \
    2>&1 | tee -a ${TFLOG}

Actual Behavior

│ Error: Post "https://B4EF8CA80F57675D118AE1F43B888DD6.gr7.ap-northeast-1.eks.amazonaws.com/api/v1/namespaces/kube-system/serviceaccounts": context deadline exceeded
│
│   with module.kubeconfig.kubernetes_service_account.kubernetes_sa[0],
│   on modules/kubeconfig/main.tf line 66, in resource "kubernetes_service_account" "kubernetes_sa":
│   66: resource "kubernetes_service_account" "kubernetes_sa" {

I fount that the url is not reachable from Jump server.

curl -kvv https://B4EF8CA80F57675D118AE1F43B888DD6.gr7.ap-northeast-1.eks.amazonaws.com
no response

Additional Context

I found the workaroud of this issue.
However, this still need manual work, can we add VPC-peering from VPC 1 to VPC 2 in this use case in IaC tool?

  1. create VPC peering between VPC 1 and VPC 2

  2. modify the default route table in VPC 1.
    route rule dst=VPC2, target=VPC peering

  3. modify the defualt route table in VPC 2
    route rule dst=VPC1, target=VPC peering

  4. re-run the terraform failed part.

terraform apply     \
  -state ${TFSTATE} -target=module.kubeconfig \
  2>&1 | tee -a ${TFLOG}

References

No response

Code of Conduct

  • I agree to follow this project's Code of Conduct

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't workingnewAdded to an issue when it's new ;)staleOpen for 30 days with no activity

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions