diff --git a/packages/k8s/src/hooks/prepare-job.ts b/packages/k8s/src/hooks/prepare-job.ts index f518592f..ea430ccf 100644 --- a/packages/k8s/src/hooks/prepare-job.ts +++ b/packages/k8s/src/hooks/prepare-job.ts @@ -5,6 +5,7 @@ import { JobContainerInfo, ContextPorts, PrepareJobArgs, + ServiceContainerInfo, writeToResponseFile } from 'hooklib' import path from 'path' @@ -69,20 +70,11 @@ export async function prepareJob( ) } - let services: k8s.V1Container[] = [] - if (args.services?.length) { - generateServicesName(args.services) - services = args.services.map(service => { - core.debug(`Adding service '${service.image}' to pod definition`) - return createContainerSpec( - service, - generateContainerName(service.image), - false, - extension, - service.createOptions - ) - }) - } + const services: k8s.V1Container[] = processServiceContainers( + args.services, + container, + extension + ) if (!container && !services?.length) { throw new Error('No containers exist, skipping hook invocation') @@ -152,6 +144,58 @@ export async function prepareJob( generateResponseFile(responseFile, args, createdPod, isAlpine) } +export function processServiceContainers( + services?: ServiceContainerInfo[], + container?: k8s.V1Container, + extension?: k8s.V1PodTemplateSpec +): k8s.V1Container[] { + if (!services?.length) { + return [] + } + generateServicesName(services) + const serviceContainers = services.map(service => { + core.debug(`Adding service '${service.image}' to pod definition`) + return createContainerSpec( + service, + service.name, + false, + extension, + service.createOptions + ) + }) + + const tpuRequestingContainers = services.filter( + service => + service.resources?.limits && service.resources.limits['google.com/tpu'] + ) + + if (tpuRequestingContainers.length > 1) { + throw new Error( + `${tpuRequestingContainers.length} containers request for TPU's. Only 1 container per pod can request for TPU's.` + ) + } + + if (tpuRequestingContainers.length === 1) { + if ( + container?.resources?.requests && + container.resources.requests['google.com/tpu'] + ) { + core.debug( + 'removing tpu from main container resources request and limits as they are requested by the service container and only 1 container in a pod can request TPU.' + ) + delete container.resources.requests['google.com/tpu'] + if ( + container.resources.limits && + container.resources.limits['google.com/tpu'] + ) { + core.debug('removing tpu from main container resource limits') + delete container.resources.limits['google.com/tpu'] + } + } + } + return serviceContainers +} + // Create JobSet and waits for it to come online async function prepareJobSet( args: PrepareJobArgs, @@ -356,11 +400,20 @@ export function createContainerSpec( } podContainer.env = [] - for (const [key, value] of Object.entries( - container['environmentVariables'] - )) { - if (value && key !== 'HOME') { - podContainer.env.push({ name: key, value: value as string }) + if (container['environmentVariables']) { + for (const [key, value] of Object.entries( + container['environmentVariables'] + )) { + if (value && key !== 'HOME') { + podContainer.env.push({ name: key, value: value as string }) + } + } + + if (!('CI' in container['environmentVariables'])) { + podContainer.env.push({ + name: 'CI', + value: 'true' + }) } } @@ -369,13 +422,6 @@ export function createContainerSpec( value: 'true' }) - if (!('CI' in container['environmentVariables'])) { - podContainer.env.push({ - name: 'CI', - value: 'true' - }) - } - podContainer.volumeMounts = containerVolumes( container.userMountVolumes, jobContainer diff --git a/packages/k8s/tests/prepare-job-test.ts b/packages/k8s/tests/prepare-job-test.ts index a9ee70a1..67901664 100644 --- a/packages/k8s/tests/prepare-job-test.ts +++ b/packages/k8s/tests/prepare-job-test.ts @@ -1,18 +1,20 @@ import * as fs from 'fs' import * as path from 'path' import { cleanupJob } from '../src/hooks' -import { createContainerSpec, prepareJob } from '../src/hooks/prepare-job' +import { + createContainerSpec, + prepareJob, + processServiceContainers +} from '../src/hooks/prepare-job' import { TestHelper } from './test-setup' import { ENV_HOOK_TEMPLATE_PATH, ENV_NUMBER_OF_HOSTS, ENV_USE_KUBE_SCHEDULER, - generateContainerName, - readExtensionFromFile + generateContainerName } from '../src/k8s/utils' -import { getEvents, getPodByName } from '../src/k8s' +import { getPodByName } from '../src/k8s' import { V1Container } from '@kubernetes/client-node' -import * as yaml from 'js-yaml' import { JOB_CONTAINER_NAME } from '../src/hooks/constants' jest.useRealTimers() @@ -324,3 +326,62 @@ describe('Prepare job', () => { } ) }) + +describe('processServiceContainers', () => { + it('generate names for service containers', () => { + expect( + processServiceContainers( + [ + { + image: 'gcr.io/server' + }, + { + image: 'gcr.io/server' + } + ], + { + name: 'nginx', + image: 'nginx:latest', + imagePullPolicy: 'IfNotPresent' + } as V1Container + ) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'server' }), + expect.objectContaining({ name: 'server-1' }) + ]) + ) + }) + + it('generate TPU request for service containers', () => { + expect( + processServiceContainers( + [ + { + image: 'gcr.io/server', + createOptions: '--tpu=4' + } + ], + { + name: 'nginx', + image: 'nginx:latest', + imagePullPolicy: 'IfNotPresent' + } as V1Container + ) + ).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + name: 'server', + resources: { + limits: { + 'google.com/tpu': '4' + }, + requests: { + 'google.com/tpu': '4' + } + } + }) + ]) + ) + }) +})