-
Notifications
You must be signed in to change notification settings - Fork 0
Remove TPU's requests and limits from the main container if a service container is requesting for it #41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Remove TPU's requests and limits from the main container if a service container is requesting for it #41
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,7 @@ import { | |
| JobContainerInfo, | ||
| ContextPorts, | ||
| PrepareJobArgs, | ||
| ServiceContainerInfo, | ||
| writeToResponseFile | ||
| } from 'hooklib' | ||
| import path from 'path' | ||
|
|
@@ -69,20 +70,11 @@ export async function prepareJob( | |
| ) | ||
| } | ||
|
|
||
| let services: k8s.V1Container[] = [] | ||
| if (args.services?.length) { | ||
| generateServicesName(args.services) | ||
| services = args.services.map(service => { | ||
| core.debug(`Adding service '${service.image}' to pod definition`) | ||
| return createContainerSpec( | ||
| service, | ||
| generateContainerName(service.image), | ||
| false, | ||
| extension, | ||
| service.createOptions | ||
| ) | ||
| }) | ||
| } | ||
| const services: k8s.V1Container[] = processServiceContainers( | ||
| args.services, | ||
| container, | ||
| extension | ||
| ) | ||
|
|
||
| if (!container && !services?.length) { | ||
| throw new Error('No containers exist, skipping hook invocation') | ||
|
|
@@ -152,6 +144,57 @@ export async function prepareJob( | |
| generateResponseFile(responseFile, args, createdPod, isAlpine) | ||
| } | ||
|
|
||
| function processServiceContainers( | ||
| services?: ServiceContainerInfo[], | ||
| container?: k8s.V1Container, | ||
| extension?: k8s.V1PodTemplateSpec | ||
| ): k8s.V1Container[] { | ||
| if (!services?.length) { | ||
| return [] | ||
| } | ||
| generateServicesName(services) | ||
| const serviceContainers = services.map(service => { | ||
| core.debug(`Adding service '${service.image}' to pod definition`) | ||
| return createContainerSpec( | ||
| service, | ||
| generateContainerName(service.image), | ||
| false, | ||
| extension, | ||
| service.createOptions | ||
| ) | ||
| }) | ||
|
|
||
| const tpuRequestingContainers = services.filter( | ||
| service => | ||
| service.resources?.limits && service.resources.limits['google.com/tpu'] | ||
| ) | ||
|
|
||
| if (tpuRequestingContainers.length > 1) { | ||
| throw new Error( | ||
| `${tpuRequestingContainers.length} containers request for TPU's. Only 1 container per pod can request for TPU's.` | ||
| ) | ||
| } | ||
|
|
||
| if (tpuRequestingContainers.length === 1) { | ||
| if ( | ||
| container?.resources?.requests && | ||
| container.resources.requests['google.com/tpu'] | ||
| ) { | ||
| core.debug( | ||
| 'removing tpu from main container resources request and limits as they are requested by the service container and only 1 container in a pod can request TPU.' | ||
| ) | ||
| delete container.resources.requests['google.com/tpu'] | ||
xinxinmo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| if ( | ||
| container.resources.limits && | ||
| container.resources.limits['google.com/tpu'] | ||
| ) { | ||
| delete container.resources.limits['google.com/tpu'] | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Let's also add a log for indicating the delete operation was finished successfully for debugging? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh I don't see it's added yet. Maybe the latest commit hasn't been uploaded? |
||
| } | ||
| } | ||
| return serviceContainers | ||
| } | ||
|
|
||
| // Create JobSet and waits for it to come online | ||
| async function prepareJobSet( | ||
| args: PrepareJobArgs, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.