Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 72 additions & 26 deletions packages/k8s/src/hooks/prepare-job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
JobContainerInfo,
ContextPorts,
PrepareJobArgs,
ServiceContainerInfo,
writeToResponseFile
} from 'hooklib'
import path from 'path'
Expand Down Expand Up @@ -69,20 +70,11 @@ export async function prepareJob(
)
}

let services: k8s.V1Container[] = []
if (args.services?.length) {
generateServicesName(args.services)
services = args.services.map(service => {
core.debug(`Adding service '${service.image}' to pod definition`)
return createContainerSpec(
service,
generateContainerName(service.image),
false,
extension,
service.createOptions
)
})
}
const services: k8s.V1Container[] = processServiceContainers(
args.services,
container,
extension
)

if (!container && !services?.length) {
throw new Error('No containers exist, skipping hook invocation')
Expand Down Expand Up @@ -152,6 +144,58 @@ export async function prepareJob(
generateResponseFile(responseFile, args, createdPod, isAlpine)
}

export function processServiceContainers(
services?: ServiceContainerInfo[],
container?: k8s.V1Container,
extension?: k8s.V1PodTemplateSpec
): k8s.V1Container[] {
if (!services?.length) {
return []
}
generateServicesName(services)
const serviceContainers = services.map(service => {
core.debug(`Adding service '${service.image}' to pod definition`)
return createContainerSpec(
service,
service.name,
false,
extension,
service.createOptions
)
})

const tpuRequestingContainers = services.filter(
service =>
service.resources?.limits && service.resources.limits['google.com/tpu']
)

if (tpuRequestingContainers.length > 1) {
throw new Error(
`${tpuRequestingContainers.length} containers request for TPU's. Only 1 container per pod can request for TPU's.`
)
}

if (tpuRequestingContainers.length === 1) {
if (
container?.resources?.requests &&
container.resources.requests['google.com/tpu']
) {
core.debug(
'removing tpu from main container resources request and limits as they are requested by the service container and only 1 container in a pod can request TPU.'
)
delete container.resources.requests['google.com/tpu']
if (
container.resources.limits &&
container.resources.limits['google.com/tpu']
) {
core.debug('removing tpu from main container resource limits')
delete container.resources.limits['google.com/tpu']
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Let's also add a log for indicating the delete operation was finished successfully for debugging?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I don't see it's added yet. Maybe the latest commit hasn't been uploaded?

}
}
return serviceContainers
}

// Create JobSet and waits for it to come online
async function prepareJobSet(
args: PrepareJobArgs,
Expand Down Expand Up @@ -356,11 +400,20 @@ export function createContainerSpec(
}

podContainer.env = []
for (const [key, value] of Object.entries(
container['environmentVariables']
)) {
if (value && key !== 'HOME') {
podContainer.env.push({ name: key, value: value as string })
if (container['environmentVariables']) {
for (const [key, value] of Object.entries(
container['environmentVariables']
)) {
if (value && key !== 'HOME') {
podContainer.env.push({ name: key, value: value as string })
}
}

if (!('CI' in container['environmentVariables'])) {
podContainer.env.push({
name: 'CI',
value: 'true'
})
}
}

Expand All @@ -369,13 +422,6 @@ export function createContainerSpec(
value: 'true'
})

if (!('CI' in container['environmentVariables'])) {
podContainer.env.push({
name: 'CI',
value: 'true'
})
}

podContainer.volumeMounts = containerVolumes(
container.userMountVolumes,
jobContainer
Expand Down
71 changes: 66 additions & 5 deletions packages/k8s/tests/prepare-job-test.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import * as fs from 'fs'
import * as path from 'path'
import { cleanupJob } from '../src/hooks'
import { createContainerSpec, prepareJob } from '../src/hooks/prepare-job'
import {
createContainerSpec,
prepareJob,
processServiceContainers
} from '../src/hooks/prepare-job'
import { TestHelper } from './test-setup'
import {
ENV_HOOK_TEMPLATE_PATH,
ENV_NUMBER_OF_HOSTS,
ENV_USE_KUBE_SCHEDULER,
generateContainerName,
readExtensionFromFile
generateContainerName
} from '../src/k8s/utils'
import { getEvents, getPodByName } from '../src/k8s'
import { getPodByName } from '../src/k8s'
import { V1Container } from '@kubernetes/client-node'
import * as yaml from 'js-yaml'
import { JOB_CONTAINER_NAME } from '../src/hooks/constants'

jest.useRealTimers()
Expand Down Expand Up @@ -324,3 +326,62 @@ describe('Prepare job', () => {
}
)
})

describe('processServiceContainers', () => {
it('generate names for service containers', () => {
expect(
processServiceContainers(
[
{
image: 'gcr.io/server'
},
{
image: 'gcr.io/server'
}
],
{
name: 'nginx',
image: 'nginx:latest',
imagePullPolicy: 'IfNotPresent'
} as V1Container
)
).toEqual(
expect.arrayContaining([
expect.objectContaining({ name: 'server' }),
expect.objectContaining({ name: 'server-1' })
])
)
})

it('generate TPU request for service containers', () => {
expect(
processServiceContainers(
[
{
image: 'gcr.io/server',
createOptions: '--tpu=4'
}
],
{
name: 'nginx',
image: 'nginx:latest',
imagePullPolicy: 'IfNotPresent'
} as V1Container
)
).toEqual(
expect.arrayContaining([
expect.objectContaining({
name: 'server',
resources: {
limits: {
'google.com/tpu': '4'
},
requests: {
'google.com/tpu': '4'
}
}
})
])
)
})
})
Loading