Skip to content

[AN-449] Upgrade Galaxy chart version & install galaxy-deps chart for Terra clusters #4835

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ ENV HELM_DEBUG 1
ENV TERRA_APP_SETUP_VERSION 0.1.0
ENV TERRA_APP_VERSION 0.5.0
# This is galaxykubeman, which references Galaxy
ENV GALAXY_VERSION 2.10.0
ENV GALAXY_VERSION 3.0.0
ENV NGINX_VERSION 4.3.0
# This is galaxy-deps which is installed for Terra clusters
ENV GALAXY_DEPS_VERSION 1.0.0
# If you update this here, make sure to also update reference.conf:
ENV CROMWELL_CHART_VERSION 0.2.523
ENV HAIL_BATCH_CHART_VERSION 0.2.0
Expand All @@ -45,13 +47,14 @@ RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master
./get_helm.sh --version v3.15.3 && \
rm get_helm.sh

# Add the repos containing nginx, galaxy, setup apps, custom apps, cromwell and aou charts
# Add the repos containing nginx, galaxy, galaxy-deps, setup apps, custom apps, cromwell and aou charts
RUN helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx && \
helm repo add galaxy https://raw.githubusercontent.com/cloudve/helm-charts/anvil/ && \
helm repo add terra-app-setup-charts https://storage.googleapis.com/terra-app-setup-chart && \
helm repo add terra https://terra-app-charts.storage.googleapis.com && \
helm repo add cromwell-helm https://broadinstitute.github.io/cromwhelm/charts/ && \
helm repo add terra-helm https://terra-helm.storage.googleapis.com && \
helm repo add cloudve https://raw.githubusercontent.com/CloudVE/helm-charts/master/ && \
helm repo update

# .Files helm helper can't access files outside a chart. Hence in order to populate cert file properly, we're
Expand All @@ -68,6 +71,7 @@ RUN cd /leonardo && \
helm pull terra-helm/rstudio --version $RSTUDIO_CHART_VERSION --untar && \
helm pull terra-helm/sas --version $SAS_CHART_VERSION --untar && \
helm pull oci://terradevacrpublic.azurecr.io/hail/hail-batch-terra-azure --version $HAIL_BATCH_CHART_VERSION --untar && \
helm pull cloudve/galaxy-deps --version $GALAXY_DEPS_VERSION --untar && \
cd /

# Install https://github.com/apangin/jattach to get access to JDK tools
Expand Down
13 changes: 11 additions & 2 deletions http/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,14 @@ gke {
"controller.admissionWebhooks.enabled=false"
]
}
galaxyDeps {
namespace = "galaxy-deps"
release = "galaxy-deps"
chartName = "/leonardo/galaxy-deps"
# If you change this here, be sure to update it in the dockerfile
chartVersion = "1.0.0",
values = []
}
galaxyApp {
# See comment in KubernetesServiceInterp for more context. Theoretically release names should
# only need to be unique within a namespace, but something in the Galaxy chart requires them
Expand All @@ -775,7 +783,7 @@ gke {
chartName = "/leonardo/galaxykubeman"
# If you change this here, be sure to update it in the dockerfile
# This is galaxykubeman, which references Galaxy
chartVersion = "2.10.0"
chartVersion = "3.0.0"
namespaceNameSuffix = "gxy-ns"
serviceAccountName = "gxy-ksa"
# Setting uninstallKeepHistory will cause the `helm uninstall` command to keep a record of
Expand Down Expand Up @@ -817,7 +825,8 @@ gke {
"2.8.0",
"2.8.1",
"2.9.0",
"2.10.0"
"2.10.0",
"3.0.0" # TODO - Is it right to exclude it here?
]
}
galaxyDisk {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -643,6 +643,16 @@ object Config {
)
}

implicit private val galaxyDepsConfigReader: ValueReader[KubernetesGalaxyDepsConfig] = ValueReader.relative { config =>
KubernetesGalaxyDepsConfig(
config.as[NamespaceName]("namespace"),
config.as[Release]("release"),
config.as[ChartName]("chartName"),
config.as[ChartVersion]("chartVersion"),
config.as[List[ValueConfig]]("values")
)
}

implicit private val namespaceNameSuffixReader: ValueReader[NamespaceNameSuffix] =
stringValueReader.map(NamespaceNameSuffix)
implicit private val releaseNameSuffixReader: ValueReader[ReleaseNameSuffix] =
Expand Down Expand Up @@ -773,6 +783,7 @@ object Config {
val gkeAllowedAppConfig = config.as[AllowedAppConfig]("gke.allowedApp")
val gkeNodepoolConfig = NodepoolConfig(gkeDefaultNodepoolConfig, gkeGalaxyNodepoolConfig)
val gkeGalaxyDiskConfig = config.as[GalaxyDiskConfig]("gke.galaxyDisk")
val gkeGalaxyDepsConfig = config.as[KubernetesGalaxyDepsConfig]("gke.galaxyDeps")

implicit private val leoPubsubMessageSubscriberConfigReader: ValueReader[LeoPubsubMessageSubscriberConfig] =
ValueReader.relative { config =>
Expand Down Expand Up @@ -910,6 +921,7 @@ object Config {
appMonitorConfig,
gkeClusterConfig,
proxyConfig,
gkeGalaxyDiskConfig
gkeGalaxyDiskConfig,
gkeGalaxyDepsConfig
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package org.broadinstitute.dsde.workbench.leonardo.config

import org.broadinstitute.dsde.workbench.google2.KubernetesSerializableName.NamespaceName
import org.broadinstitute.dsde.workbench.leonardo.Chart
import org.broadinstitute.dsp.{ChartName, ChartVersion, Release}

final case class KubernetesGalaxyDepsConfig(namespace: NamespaceName,
release: Release,
chartName: ChartName,
chartVersion: ChartVersion,
values: List[ValueConfig]) {
def chart: Chart = Chart(chartName, chartVersion)
}
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ class AppRoutes(kubernetesService: AppService[IO], userInfoDirectives: UserInfoD
}
}

// Saloni - called for above POST /app route
private[api] def createAppHandler(userInfo: UserInfo,
googleProject: GoogleProject,
appName: AppName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ final class LeoAppServiceInterp[F[_]: Parallel](config: AppServiceConfig,
)
_ <- F.raiseWhen(!hasPermission)(ForbiddenError(userEmail))

// Saloni - note AOU_UI_LABEL label => maps to value "all-of-us"
enableIntraNodeVisibility = req.labels.get(AOU_UI_LABEL).exists(x => x == "true")
_ <- req.appType match {
case AppType.Galaxy | AppType.HailBatch | AppType.Wds | AppType.Cromwell | AppType.WorkflowsApp |
Expand Down Expand Up @@ -277,15 +278,18 @@ final class LeoAppServiceInterp[F[_]: Parallel](config: AppServiceConfig,
)
app <- appQuery.save(saveApp, Some(ctx.traceId)).transaction

// Saloni - this is where the action is determined for cluster creation
clusterNodepoolAction = saveClusterResult match {
case ClusterExists(_, _) =>
// If we're using a pre-existing nodepool then don't specify CreateNodepool in the pubsub message
if (req.autopilot.isDefined || userNodepoolOpt.isDefined) None
else Some(ClusterNodepoolAction.CreateNodepool(nodepool.id))
case ClusterDoesNotExist(c, n) =>
if (req.autopilot.isDefined) Some(ClusterNodepoolAction.CreateCluster(c.id))
else
else {
// Saloni - this is where new cluster is created
Some(ClusterNodepoolAction.CreateClusterAndNodepool(c.id, n.id, nodepool.id))
}
}
createAppMessage = CreateAppMessage(
googleProject,
Expand Down Expand Up @@ -1051,6 +1055,9 @@ final class LeoAppServiceInterp[F[_]: Parallel](config: AppServiceConfig,
}
} yield ()

// Saloni - what does this do?
// maybe it is fetching cluster that was already created before installing new apps
// called in createApp and createAppV2
private[service] def getSavableCluster(
userEmail: WorkbenchEmail,
cloudContext: CloudContext,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ class LeoPubsubMessageSubscriber[F[_]](
handleDeleteDiskMessage(msg)
case msg: UpdateDiskMessage =>
handleUpdateDiskMessage(msg)
// Saloni - here
case msg: CreateAppMessage =>
handleCreateAppMessage(msg)
case msg: DeleteAppMessage =>
Expand Down Expand Up @@ -961,6 +962,7 @@ class LeoPubsubMessageSubscriber[F[_]](
)
} yield ()

// Saloni - CreateAppMessage object contains enableIntraNodeVisibility
private[monitor] def handleCreateAppMessage(msg: CreateAppMessage)(implicit
ev: Ask[F, AppContext]
): F[Unit] =
Expand Down Expand Up @@ -991,6 +993,7 @@ class LeoPubsubMessageSubscriber[F[_]](
} yield d
}

// Saloni - this is where the cluster might be created
// The "create app" flow potentially does a number of things:
// 1. creates a Kubernetes cluster if it doesn't exist
// 2. creates a nodepool within the cluster if it doesn't exist
Expand All @@ -1003,6 +1006,8 @@ class LeoPubsubMessageSubscriber[F[_]](
// handler.
createClusterOrNodepoolOp = msg.clusterNodepoolAction match {
case Some(ClusterNodepoolAction.CreateClusterAndNodepool(clusterId, defaultNodepoolId, nodepoolId)) =>
// Saloni - the first case is the one we care about
// enableIntraNodeVisibility passed in 'msg'
createClusterAndNodepools(msg, clusterId, autopilotEnabled = false, List(defaultNodepoolId, nodepoolId))
case Some(ClusterNodepoolAction.CreateCluster(clusterId)) =>
createClusterAndNodepools(msg, clusterId, autopilotEnabled = true, List.empty)
Expand Down Expand Up @@ -1100,6 +1105,8 @@ class LeoPubsubMessageSubscriber[F[_]](
)
} yield ()

// Saloni - this creates and polls for cluster until it is created (or error)
// CreateAppMessage has enableIntraNodeVisibility
private def createClusterAndNodepools(msg: CreateAppMessage,
clusterId: KubernetesClusterLeoId,
autopilotEnabled: Boolean,
Expand Down Expand Up @@ -1133,7 +1140,7 @@ class LeoPubsubMessageSubscriber[F[_]](
// monitor cluster creation asynchronously
monitorOp <- createClusterResultOpt.traverse_(createClusterResult =>
getGkeAlgFromRegistry()
.pollCluster(PollClusterParams(clusterId, msg.project, createClusterResult))
.pollCluster(PollClusterParams(clusterId, msg.project, createClusterResult), msg.enableIntraNodeVisibility) // polling here
.adaptError { case e =>
PubsubKubernetesError(
AppError(e.getMessage, ctx.now, ErrorAction.CreateApp, ErrorSource.Cluster, None, Some(ctx.traceId)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ trait GKEAlgebra[F[_]] {
* Polls a creating GKE cluster for its completion and also does other cluster-wide set-up like
* install nginx ingress controller.
*/
def pollCluster(params: PollClusterParams)(implicit ev: Ask[F, AppContext]): F[Unit]
def pollCluster(params: PollClusterParams, enableIntraNodeVisibility: Boolean)(implicit ev: Ask[F, AppContext]): F[Unit]

/**
* Creates a GKE nodepool and polls it for completion.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ class GKEInterpreter[F[_]](
(ps: List[KubernetesPodStatus]) => ps.forall(isPodDone)
implicit private def listDoneCheckable[A: DoneCheckable]: DoneCheckable[List[A]] = as => as.forall(_.isDone)

// Saloni - CreateClusterParams object has enableIntraNodeVisibility
override def createCluster(params: CreateClusterParams)(implicit
ev: Ask[F, AppContext]
): F[Option[CreateClusterResult]] = {
Expand Down Expand Up @@ -220,7 +221,8 @@ class GKEInterpreter[F[_]](
)
}

override def pollCluster(params: PollClusterParams)(implicit ev: Ask[F, AppContext]): F[Unit] =
// Saloni - polls for cluster creation status
override def pollCluster(params: PollClusterParams, enableIntraNodeVisibility: Boolean)(implicit ev: Ask[F, AppContext]): F[Unit] =
for {
ctx <- ev.ask

Expand All @@ -238,7 +240,7 @@ class GKEInterpreter[F[_]](
)

_ <- F.fromOption(dbCluster.nodepools.find(_.isDefault), DefaultNodepoolNotFoundException(dbCluster.id))
// Poll GKE until completion
// Poll GKE until completion --> seems to be polling until completion?
lastOp <- gkeService
.pollOperation(
params.createResult.op,
Expand Down Expand Up @@ -275,6 +277,7 @@ class GKEInterpreter[F[_]](
)
)

// Saloni - if it reaches here it must be cluster creation succeeded
_ <- logger.info(ctx.loggingCtx)(
s"Successfully created cluster ${dbCluster.getClusterId.toString}!"
)
Expand All @@ -301,6 +304,11 @@ class GKEInterpreter[F[_]](
)
)
.transaction

// Saloni - maybe here we can install galaxy-deps helm chart for Terra clusters
// install galaxy-deps helm chart only for Terra clusters
_ <- if (!enableIntraNodeVisibility) installGalaxyDepsForTerra(dbCluster, googleCluster) else F.unit

_ <- kubernetesClusterQuery.updateStatus(dbCluster.id, KubernetesClusterStatus.Running).transaction
_ <- nodepoolQuery.updateStatuses(dbCluster.nodepools.map(_.id), NodepoolStatus.Running).transaction
} yield ()
Expand Down Expand Up @@ -1302,6 +1310,7 @@ class GKEInterpreter[F[_]](
}
} yield res

// Saloni - this is installing a helm chart
private[util] def installNginx(dbCluster: KubernetesCluster, googleCluster: Cluster)(implicit
ev: Ask[F, AppContext]
): F[IP] =
Expand Down Expand Up @@ -1348,6 +1357,34 @@ class GKEInterpreter[F[_]](
)
} yield loadBalancerIp

// Saloni TODO - since this is being installed does it also need to be uninstalled when deleting resources?
private[util] def installGalaxyDepsForTerra(dbCluster: KubernetesCluster, googleCluster: Cluster)(implicit
ev: Ask[F, AppContext]): F[Unit] = {
for {
ctx <- ev.ask

_ <- logger.info(ctx.loggingCtx)(
s"Installing galaxy-deps helm chart ${config.galaxyDepsConfig.chart} in cluster ${dbCluster.getClusterId.toString}"
)

helmAuthContext <- getHelmAuthContext(googleCluster, dbCluster, config.galaxyDepsConfig.namespace)

// Invoke helm
_ <- helmClient
.installChart(
config.galaxyDepsConfig.release,
config.galaxyDepsConfig.chartName,
config.galaxyDepsConfig.chartVersion,
org.broadinstitute.dsp.Values(config.galaxyDepsConfig.values.map(_.value).mkString(",")),
createNamespace = true
)
.run(helmAuthContext)

// Saloni - TODO do we need to poll for anything?
} yield ()
}

// Saloni - this is where Galaxy is installed; called from createAndPollApp
private[util] def installGalaxy(helmAuthContext: AuthContext,
appName: AppName,
release: Release,
Expand Down Expand Up @@ -2088,7 +2125,8 @@ final case class GKEInterpreterConfig(leoUrlBase: URL,
monitorConfig: AppMonitorConfig,
clusterConfig: KubernetesClusterConfig,
proxyConfig: ProxyConfig,
galaxyDiskConfig: GalaxyDiskConfig
galaxyDiskConfig: GalaxyDiskConfig,
galaxyDepsConfig: KubernetesGalaxyDepsConfig
)

final case class TerraAppSetupChartConfig(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ object KubernetesTestData {
val galaxyApp = AppType.Galaxy

val galaxyChartName = ChartName("/leonardo/galaxykubeman")
val galaxyChartVersion = ChartVersion("2.10.0")
val galaxyChartVersion = ChartVersion("3.0.0")
val galaxyChart = Chart(galaxyChartName, galaxyChartVersion)

val galaxyReleasePrefix = "gxy-release"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,8 @@ final class ConfigSpec extends AnyFlatSpec with Matchers {
ChartVersion("2.8.0"),
ChartVersion("2.8.1"),
ChartVersion("2.9.0"),
ChartVersion("2.10.0")
ChartVersion("2.10.0"),
ChartVersion("3.0.0"),
)
)
Config.gkeGalaxyAppConfig shouldBe expectedResult
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ class MockGKEService extends GKEAlgebra[IO] {
* Polls a creating GKE cluster for its completion and also does other cluster-wide set-up like
* install nginx ingress controller.
*/
override def pollCluster(params: PollClusterParams)(implicit ev: Ask[IO, AppContext]): IO[Unit] = IO.unit
override def pollCluster(params: PollClusterParams, enableIntraNodeVisibility: Boolean)(implicit ev: Ask[IO, AppContext]): IO[Unit] = IO.unit

/** Creates a GKE nodepool but doesn't wait for its completion. */
override def createAndPollNodepool(params: CreateNodepoolParams)(implicit ev: Ask[IO, AppContext]): IO[Unit] = IO.unit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,8 @@ class GKEInterpreterSpec extends AnyFlatSpecLike with TestComponent with Mockito
PollClusterParams(savedCluster1.id,
savedCluster1.cloudContext.asInstanceOf[CloudContext.Gcp].value,
createResult.get
)
),
false
)
.attempt
} yield r shouldBe (Left(
Expand Down
Loading