Skip to content

Commit f07f194

Browse files
authored
Gpu refactor (#4021)
1 parent 89d9a47 commit f07f194

File tree

8 files changed

+171
-75
lines changed

8 files changed

+171
-75
lines changed

dashboard/src/lib/porter-apps/services.ts

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ export const serviceValidator = z.object({
8282
cpuCores: serviceNumberValidator,
8383
ramMegabytes: serviceNumberValidator,
8484
gpuCoresNvidia: serviceNumberValidator,
85+
gpu: z.object({
86+
enabled: serviceBooleanValidator,
87+
gpuCoresNvidia: serviceNumberValidator,
88+
}),
8589
smartOptimization: serviceBooleanValidator.optional(),
8690
terminationGracePeriodSeconds: serviceNumberValidator.optional(),
8791
config: z.discriminatedUnion("type", [
@@ -117,6 +121,10 @@ export type SerializedService = {
117121
ramMegabytes: number;
118122
smartOptimization?: boolean;
119123
gpuCoresNvidia: number;
124+
gpu: {
125+
enabled: boolean;
126+
gpuCoresNvidia: number;
127+
};
120128
terminationGracePeriodSeconds?: number;
121129
config:
122130
| {
@@ -196,6 +204,10 @@ export function defaultSerialized({
196204
cpuCores: defaultCPU,
197205
ramMegabytes: defaultRAM,
198206
gpuCoresNvidia: 0,
207+
gpu: {
208+
enabled: false,
209+
gpuCoresNvidia: 0,
210+
},
199211
smartOptimization: true,
200212
};
201213

@@ -264,6 +276,10 @@ export function serializeService(service: ClientService): SerializedService {
264276
ramMegabytes: Math.round(service.ramMegabytes.value), // RAM must be an integer
265277
smartOptimization: service.smartOptimization?.value,
266278
gpuCoresNvidia: service.gpuCoresNvidia.value,
279+
gpu: {
280+
enabled: service.gpu.enabled.value,
281+
gpuCoresNvidia: service.gpu.gpuCoresNvidia.value,
282+
},
267283
terminationGracePeriodSeconds: service.terminationGracePeriodSeconds?.value,
268284
config: match(service.config)
269285
.with({ type: "web" }, (config) =>
@@ -336,6 +352,16 @@ export function deserializeService({
336352
instances: ServiceField.number(service.instances, override?.instances),
337353
port: ServiceField.number(service.port, override?.port),
338354
cpuCores: ServiceField.number(service.cpuCores, override?.cpuCores),
355+
gpu: {
356+
enabled: ServiceField.boolean(
357+
service.gpu?.enabled,
358+
override?.gpu.enabled
359+
),
360+
gpuCoresNvidia: ServiceField.number(
361+
service.gpu?.gpuCoresNvidia,
362+
override?.gpu?.gpuCoresNvidia
363+
),
364+
},
339365
gpuCoresNvidia: ServiceField.number(
340366
service.gpuCoresNvidia,
341367
override?.gpuCoresNvidia

dashboard/src/main/home/app-dashboard/validate-apply/services-settings/ServiceContainer.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ const ServiceContainer: React.FC<ServiceProps> = ({
154154
{service.name.value.trim().length > 0
155155
? service.name.value
156156
: "New Service"}
157-
{service.gpuCoresNvidia.value > 0 && (
157+
{service.gpu.enabled.value && (
158158
<>
159159
<Spacer inline x={1.5} />
160160
<TagContainer>
@@ -276,7 +276,7 @@ const ServiceHeader = styled.div<{
276276
border-radius: 20px;
277277
margin-left: -10px;
278278
transform: ${(props: { showExpanded?: boolean }) =>
279-
props.showExpanded ? "" : "rotate(-90deg)"};
279+
props.showExpanded ? "" : "rotate(-90deg)"};
280280
}
281281
`;
282282

dashboard/src/main/home/app-dashboard/validate-apply/services-settings/tabs/Resources.tsx

Lines changed: 31 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -227,27 +227,28 @@ const Resources: React.FC<ResourcesProps> = ({
227227
<>
228228
<Spacer y={1} />
229229
<Controller
230-
name={`app.services.${index}.gpuCoresNvidia`}
230+
name={`app.services.${index}.gpu`}
231231
control={control}
232232
render={({ field: { value, onChange } }) => (
233233
<>
234234
<Container row>
235235
<Switch
236236
size="small"
237237
color="primary"
238-
checked={value.value > 0}
238+
checked={value.enabled.value}
239239
disabled={!clusterContainsGPUNodes}
240240
onChange={() => {
241-
if (value.value > 0) {
242-
onChange({
243-
...value,
244-
value: 0,
245-
});
246-
} else
247-
onChange({
248-
...value,
249-
value: 1,
250-
});
241+
onChange({
242+
...value,
243+
enabled: {
244+
...value.enabled,
245+
value: !value.enabled.value,
246+
},
247+
gpuCoresNvidia: {
248+
...value.gpuCoresNvidia,
249+
value: value.enabled.value ? 0 : 1,
250+
}
251+
});
251252
}}
252253
inputProps={{ "aria-label": "controlled" }}
253254
/>
@@ -264,7 +265,7 @@ const Resources: React.FC<ResourcesProps> = ({
264265
You cluster has no GPU nodes available.
265266
</Text>
266267
<Spacer inline x={0.5} />
267-
<Tag>
268+
{currentCluster.status !== "UPDATING" && <Tag>
268269
<Link
269270
onClick={() => {
270271
setClusterModalVisible(true);
@@ -273,7 +274,7 @@ const Resources: React.FC<ResourcesProps> = ({
273274
<TagIcon src={addCircle} />
274275
Add GPU nodes
275276
</Link>
276-
</Tag>
277+
</Tag>}
277278
</>
278279
)}
279280
</Container>
@@ -290,23 +291,22 @@ const Resources: React.FC<ResourcesProps> = ({
290291
</>
291292
)}
292293
/>
293-
{currentCluster.status === "UPDATING" &&
294-
clusterContainsGPUNodes && (
295-
<CheckItemContainer>
296-
<CheckItemTop>
297-
<Loading offset="0px" width="20px" height="20px" />
298-
<Spacer inline x={1} />
299-
<Text>{"Creating GPU nodes..."}</Text>
300-
<Spacer inline x={1} />
301-
<Tag>
302-
<Link to={`/cluster-dashboard`}>
303-
<TagIcon src={infra} />
304-
View Status
305-
</Link>
306-
</Tag>
307-
</CheckItemTop>
308-
</CheckItemContainer>
309-
)}
294+
{(currentCluster.status === "UPDATING" && !clusterContainsGPUNodes) && (
295+
<CheckItemContainer>
296+
<CheckItemTop>
297+
<Loading offset="0px" width="20px" height="20px" />
298+
<Spacer inline x={1} />
299+
<Text>{"Cluster is updating..."}</Text>
300+
<Spacer inline x={1} />
301+
<Tag>
302+
<Link to={`/cluster-dashboard`}>
303+
<TagIcon src={infra} />
304+
View Status
305+
</Link>
306+
</Tag>
307+
</CheckItemTop>
308+
</CheckItemContainer>
309+
)}
310310
</>
311311
)}
312312
{match(service.config)

internal/porter_app/test/parse_test.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ var result_nobuild = &porterv1.PorterApp{
5454
Port: 8080,
5555
CpuCores: 0.1,
5656
RamMegabytes: 256,
57+
Gpu: &porterv1.GPU{
58+
Enabled: false,
59+
GpuCoresNvidia: 0,
60+
},
5761
Config: &porterv1.Service_WebConfig{
5862
WebConfig: &porterv1.WebServiceConfig{
5963
Autoscaling: &porterv1.Autoscaling{
@@ -87,6 +91,10 @@ var result_nobuild = &porterv1.PorterApp{
8791
CpuCores: 0.1,
8892
RamMegabytes: 256,
8993
GpuCoresNvidia: 0,
94+
Gpu: &porterv1.GPU{
95+
Enabled: false,
96+
GpuCoresNvidia: 0,
97+
},
9098
Config: &porterv1.Service_WorkerConfig{
9199
WorkerConfig: &porterv1.WorkerServiceConfig{
92100
Autoscaling: nil,
@@ -100,6 +108,10 @@ var result_nobuild = &porterv1.PorterApp{
100108
CpuCores: 0.1,
101109
RamMegabytes: 256,
102110
GpuCoresNvidia: 0,
111+
Gpu: &porterv1.GPU{
112+
Enabled: false,
113+
GpuCoresNvidia: 0,
114+
},
103115
Config: &porterv1.Service_JobConfig{
104116
JobConfig: &porterv1.JobServiceConfig{
105117
AllowConcurrentOptional: pointer.Bool(true),
@@ -119,6 +131,10 @@ var result_nobuild = &porterv1.PorterApp{
119131
CpuCores: 0.1,
120132
RamMegabytes: 256,
121133
GpuCoresNvidia: 0,
134+
Gpu: &porterv1.GPU{
135+
Enabled: false,
136+
GpuCoresNvidia: 0,
137+
},
122138
Config: &porterv1.Service_WebConfig{
123139
WebConfig: &porterv1.WebServiceConfig{
124140
Autoscaling: &porterv1.Autoscaling{
@@ -152,6 +168,10 @@ var result_nobuild = &porterv1.PorterApp{
152168
CpuCores: 0.1,
153169
RamMegabytes: 256,
154170
GpuCoresNvidia: 0,
171+
Gpu: &porterv1.GPU{
172+
Enabled: false,
173+
GpuCoresNvidia: 0,
174+
},
155175
Config: &porterv1.Service_WorkerConfig{
156176
WorkerConfig: &porterv1.WorkerServiceConfig{
157177
Autoscaling: nil,
@@ -165,6 +185,10 @@ var result_nobuild = &porterv1.PorterApp{
165185
CpuCores: 0.1,
166186
RamMegabytes: 256,
167187
GpuCoresNvidia: 0,
188+
Gpu: &porterv1.GPU{
189+
Enabled: false,
190+
GpuCoresNvidia: 0,
191+
},
168192
Config: &porterv1.Service_JobConfig{
169193
JobConfig: &porterv1.JobServiceConfig{
170194
AllowConcurrentOptional: pointer.Bool(true),
@@ -182,6 +206,10 @@ var result_nobuild = &porterv1.PorterApp{
182206
CpuCores: 0,
183207
RamMegabytes: 0,
184208
GpuCoresNvidia: 0,
209+
Gpu: &porterv1.GPU{
210+
Enabled: false,
211+
GpuCoresNvidia: 0,
212+
},
185213
Config: &porterv1.Service_JobConfig{},
186214
Type: 3,
187215
},

internal/porter_app/testdata/v1_input_no_build_no_image.yaml

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ apps:
33
example-job:
44
type: job
55
run: echo 'hello world'
6+
gpu: {}
67
config:
78
allowConcurrent: true
89
resources:
@@ -11,52 +12,54 @@ apps:
1112
memory: 256Mi
1213
schedule:
1314
enabled: true
14-
value: '*/10 * * * *'
15+
value: "*/10 * * * *"
1516
paused: true
1617
cloudsql:
1718
enabled: false
18-
connectionName: ''
19-
dbPort: '5432'
20-
serviceAccountJSON: ''
19+
connectionName: ""
20+
dbPort: "5432"
21+
serviceAccountJSON: ""
2122
example-wkr:
2223
type: worker
2324
run: "echo 'work'"
25+
gpu: {}
2426
config:
25-
replicaCount: '1'
27+
replicaCount: "1"
2628
container:
27-
port: '80'
29+
port: "80"
2830
resources:
2931
requests:
3032
cpu: 100m
3133
memory: 256Mi
3234
autoscaling:
3335
enabled: false
34-
minReplicas: '1'
35-
maxReplicas: '10'
36-
targetCPUUtilizationPercentage: '50'
37-
targetMemoryUtilizationPercentage: '50'
36+
minReplicas: "1"
37+
maxReplicas: "10"
38+
targetCPUUtilizationPercentage: "50"
39+
targetMemoryUtilizationPercentage: "50"
3840
cloudsql:
3941
enabled: false
40-
connectionName: ''
41-
dbPort: '5432'
42-
serviceAccountJSON: ''
42+
connectionName: ""
43+
dbPort: "5432"
44+
serviceAccountJSON: ""
4345
example-web:
4446
type: web
4547
run: node index.js
48+
gpu: {}
4649
config:
47-
replicaCount: '0'
50+
replicaCount: "0"
4851
resources:
4952
requests:
5053
cpu: 100m
5154
memory: 256Mi
5255
container:
53-
port: '8080'
56+
port: "8080"
5457
autoscaling:
5558
enabled: true
56-
minReplicas: '1'
57-
maxReplicas: '3'
58-
targetCPUUtilizationPercentage: '60'
59-
targetMemoryUtilizationPercentage: '60'
59+
minReplicas: "1"
60+
maxReplicas: "3"
61+
targetCPUUtilizationPercentage: "60"
62+
targetMemoryUtilizationPercentage: "60"
6063
ingress:
6164
enabled: true
6265
custom_domain: true
@@ -66,30 +69,30 @@ apps:
6669
porter_hosts: []
6770
annotations:
6871
service:
69-
port: '8080'
72+
port: "8080"
7073
health:
7174
startupProbe:
7275
enabled: false
73-
failureThreshold: '3'
76+
failureThreshold: "3"
7477
path: /startupz
75-
periodSeconds: '5'
78+
periodSeconds: "5"
7679
readinessProbe:
7780
enabled: true
78-
failureThreshold: '3'
81+
failureThreshold: "3"
7982
path: /healthz
80-
initialDelaySeconds: '0'
83+
initialDelaySeconds: "0"
8184
livenessProbe:
8285
enabled: true
83-
failureThreshold: '3'
86+
failureThreshold: "3"
8487
path: /healthz
85-
periodSeconds: '5'
88+
periodSeconds: "5"
8689
cloudsql:
8790
enabled: false
88-
connectionName: ''
89-
dbPort: '5432'
90-
serviceAccountJSON: ''
91+
connectionName: ""
92+
dbPort: "5432"
93+
serviceAccountJSON: ""
9194
release:
9295
run: ls
9396
env:
94-
PORT: '8080'
95-
NODE_ENV: 'production'
97+
PORT: "8080"
98+
NODE_ENV: "production"

0 commit comments

Comments
 (0)