Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions deployments/stacks/dpe-k8s-deployments/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ locals {
git_revision = var.git_revision
}
module "sage-aws-eks-autoscaler" {
source = "spacelift.io/sagebionetworks/sage-aws-eks-autoscaler/aws"
version = "0.9.0"
source = "../../../modules/sage-aws-k8s-node-autoscaler"
cluster_name = var.cluster_name
private_vpc_subnet_ids = var.private_subnet_ids_eks_worker_nodes
vpc_id = var.vpc_id
Expand Down
5 changes: 3 additions & 2 deletions modules/sage-aws-k8s-node-autoscaler/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ resource "helm_release" "ocean-kubernetes-controller" {
repository = "https://charts.spot.io"
chart = "ocean-kubernetes-controller"
namespace = "spot-system"
version = "0.1.52"
version = "0.1.66"
create_namespace = true

values = [templatefile("${path.module}/templates/values.yaml", {})]
Expand All @@ -106,7 +106,7 @@ resource "helm_release" "ocean-kubernetes-controller" {

module "ocean-aws-k8s" {
source = "spotinst/ocean-aws-k8s/spotinst"
version = "1.4.0"
version = "1.11.0"

# Configuration
cluster_name = var.cluster_name
Expand Down Expand Up @@ -148,3 +148,4 @@ module "ocean-aws-k8s" {
virtualization_types = null
}
}

59 changes: 46 additions & 13 deletions modules/sage-aws-k8s-node-autoscaler/templates/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ spotinst:
# -- Spot Token. (Required)
# Ref: https://docs.spot.io/administration/api/create-api-token
token: ""
# -- Spot Account. (Required)
# Ref: https://docs.spot.io/administration/organizations?id=account
# -- Spot Account ID. (Required)
# Example: `act-123abcd`
account: ""
# -- Unique identifier used by the Ocean Controller to connect (Required)
# between the Ocean backend and the Kubernetes cluster.
Expand All @@ -25,15 +25,25 @@ spotinst:
enableCsrApproval: true
# -- Disable automatic RightSizing. (Optional)
disableAutomaticRightSizing: false
# -- Disable TLS certificate validation. (Optional)
insecureSkipTLSVerify: false
# -- Sets the controller to read-only mode, removing write permissions and disabling autoscaling. (Optional)
readonly: false

# -- Configure the amount of replicas for the controller (Optional)
replicas: 2

image:
repository: us-docker.pkg.dev/spotit-today/container-labs/spotinst-kubernetes-controller
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
# -- Overrides the image tag whose default is the chart appVersion.
tag: ""
# -- Set to `true` to use an FIPS-140 compliant image. This flag adds `-fips` suffix to the image tag,
# therefore it should not be used together with the `--image.tag` flag.
# Ref: https://go.dev/doc/security/fips140
fips: false

initContainers: []

imagePullSecrets: []

Expand Down Expand Up @@ -79,9 +89,9 @@ commonLabels: {}
# Ref: https://kubernetes.io/docs/concepts/security/pod-security-standards/
podSecurityContext:
runAsNonRoot: true
runAsUser: 10001
runAsGroup: 10001
fsGroup: 10001
runAsUser: 1000690000
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on my understanding, both 10001 and 1000690000 are both non-root users. Why do we need to switch from one non-root user to another?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh I undestand why now! For some reasons, that was changed in their template:
Screenshot 2025-09-04 at 3 11 01 PM

runAsGroup: 1000690000
fsGroup: 1000690000

# -- Priority class name for the controller pod.
priorityClassName: system-node-critical
Expand All @@ -100,6 +110,8 @@ securityContext:
drop:
- ALL

command: []
Copy link
Contributor

@linglp linglp Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need to set command: []? Does it mean to use the default entry point?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not exactly sure. Many of these changes are what the developers have as defaults, so I decided to leave them in.

In helm when you run

helm show values spot/ocean-kubernetes-controller

It gives you what their default values file is for the version you are using. The changes you see in the values file are what their new suggested defaults are, and I kept a bit of our specific configuration as well that isn't the same as the default.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah got it. I saw it on the UI too:
Screenshot 2025-09-04 at 3 12 05 PM

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we usually just copy the template that they provide? I can find template here: https://artifacthub.io/packages/helm/spot/ocean-kubernetes-controller

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah exactly! I do it through helm commands, but there are a bunch of places that we can get their template.

Copying the default template and adjusting it is how I have done all of the helm based deployments in the repo


args: []
# - --test

Expand Down Expand Up @@ -190,8 +202,7 @@ metrics-server:
pullPolicy: IfNotPresent

# -- Arguments to pass to metrics-server on start up. (Optional)
args:
- --logtostderr
# args:
# enable this if you have self-signed certificates, see: https://github.com/kubernetes-incubator/metrics-server
# - --kubelet-insecure-tls

Expand All @@ -200,10 +211,12 @@ logShipping:
# -- Specifies whether to send the controller logs to Spot for analysis. (Optional)
enabled: false

# -- Specifies the log shipping container image. (Optional)
image:
# -- Image repository. (Optional)
repository: ghcr.io/fluent/fluent-bit
tag: "3.0.7"
# -- Overrides the image tag. (Optional)
tag: "3.1.9"
# -- Image pull policy. (Optional)
pullPolicy: IfNotPresent

# -- Log shipping destination configuration.
Expand All @@ -212,6 +225,26 @@ logShipping:
port: 443
tls: true

extraVolumeMounts: []

extraEnv: []

# -- Log shipping container command. (Optional)
command:
- /fluent-bit/bin/fluent-bit
- -c
- /tmp/fluent-bit.conf
- -q

# -- Log Shipping container security context
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
capabilities:
drop:
- ALL

# Auto Update process configuration.
autoUpdate:
# -- Configures the image for the auto-updater job. (Optional)
Expand All @@ -230,9 +263,9 @@ autoUpdate:
# Ref: https://kubernetes.io/docs/concepts/security/pod-security-standards/
podSecurityContext:
runAsNonRoot: true
runAsUser: 10001
runAsGroup: 10001
fsGroup: 10001
runAsUser: 1000690000
runAsGroup: 1000690000
fsGroup: 1000690000

# -- Security Context for the auto-updater container. (Optional)
securityContext:
Expand Down
4 changes: 2 additions & 2 deletions modules/sage-aws-k8s-node-autoscaler/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ terraform {
version = "~> 2.0"
}
spotinst = {
source = "spotinst/spotinst"
version = "1.172.0" # Specify the version you wish to use
source = "opentofu/spotinst"
version = "1.225.0" # Specify the version you wish to use
}
helm = {
source = "hashicorp/helm"
Expand Down