File tree Expand file tree Collapse file tree 6 files changed +6
-35
lines changed Expand file tree Collapse file tree 6 files changed +6
-35
lines changed Original file line number Diff line number Diff line change 7
7
- name : gpu-container
8
8
# torch113_cuda117_ds076
9
9
# image: docker.io/deepspeed/deepspeed:v072_torch112_cu117
10
- image : docker. io/zihaokevinzhou/deepspeed :torch113_cuda117_ds076
10
+ image : gitlab-registry.nrp-nautilus. io/zihaozhou/nautilus_tutorial :torch113_cuda117_ds076
11
11
imagePullPolicy : Always
12
12
command : ["sleep", "infinity"]
13
13
volumeMounts :
34
34
operator : In
35
35
values :
36
36
- NVIDIA-GeForce-RTX-3090
37
- - key : kubernetes.io/hostname
38
- operator : In
39
- values :
40
- - k8s-3090-02.clemson.edu
41
37
volumes :
42
38
- name : stpp-vol
43
39
persistentVolumeClaim :
Original file line number Diff line number Diff line change @@ -5,13 +5,12 @@ apiVersion: batch/v1
5
5
kind : Job
6
6
metadata :
7
7
name : mnist
8
- namespace : deep-forecast
9
8
spec :
10
9
template :
11
10
spec :
12
11
containers :
13
12
- name : gpu-container
14
- image : docker. io/horovod/horovod:sha-811cf67
13
+ image : gitlab-registry.nrp-nautilus. io/zihaozhou/nautilus_tutorial:hovorod
15
14
command : ["/bin/bash","-c"]
16
15
# NCCL_DEBUG=INFO
17
16
args : ["git clone https://github.com/Rose-STL-Lab/nautilus_tutorial.git;
41
40
operator : In
42
41
values :
43
42
- NVIDIA-GeForce-RTX-3090
44
- - key : kubernetes.io/hostname
45
- operator : In
46
- values :
47
- - k8s-3090-02.clemson.edu
48
43
volumes :
49
44
- name : stpp-vol
50
45
persistentVolumeClaim :
Original file line number Diff line number Diff line change @@ -5,13 +5,12 @@ apiVersion: batch/v1
5
5
kind : Job
6
6
metadata :
7
7
name : mnist
8
- namespace : deep-forecast
9
8
spec :
10
9
template :
11
10
spec :
12
11
containers :
13
12
- name : gpu-container
14
- image : horovod/horovod:sha-811cf67
13
+ image : gitlab-registry.nrp-nautilus.io/zihaozhou/nautilus_tutorial:hovorod
15
14
command : ["/bin/bash","-c"]
16
15
# NCCL_DEBUG=INFO
17
16
args : ["git clone https://github.com/Rose-STL-Lab/nautilus_tutorial.git;
41
40
operator : In
42
41
values :
43
42
- NVIDIA-GeForce-RTX-3090
44
- - key : kubernetes.io/hostname
45
- operator : In
46
- values :
47
- - k8s-3090-02.clemson.edu
48
43
volumes :
49
44
- name : stpp-vol
50
45
persistentVolumeClaim :
Original file line number Diff line number Diff line change @@ -5,13 +5,12 @@ apiVersion: batch/v1
5
5
kind : Job
6
6
metadata :
7
7
name : mnist
8
- namespace : deep-forecast
9
8
spec :
10
9
template :
11
10
spec :
12
11
containers :
13
12
- name : gpu-container
14
- image : docker. io/zihaokevinzhou/deepspeed :torch113_cuda117_ds076
13
+ image : gitlab-registry.nrp-nautilus. io/zihaozhou/nautilus_tutorial :torch113_cuda117_ds076
15
14
command : ["/bin/bash","-c"]
16
15
# NCCL_DEBUG=INFO
17
16
args : ['git clone https://github.com/Rose-STL-Lab/nautilus_tutorial.git;
41
40
operator : In
42
41
values :
43
42
- NVIDIA-GeForce-RTX-3090
44
- - key : kubernetes.io/hostname
45
- operator : In
46
- values :
47
- - k8s-3090-02.clemson.edu
48
43
volumes :
49
44
- name : stpp-vol
50
45
persistentVolumeClaim :
Original file line number Diff line number Diff line change @@ -5,13 +5,12 @@ apiVersion: batch/v1
5
5
kind : Job
6
6
metadata :
7
7
name : mnist
8
- namespace : deep-forecast
9
8
spec :
10
9
template :
11
10
spec :
12
11
containers :
13
12
- name : gpu-container
14
- image : horovod/horovod:sha-811cf67
13
+ image : gitlab-registry.nrp-nautilus.io/zihaozhou/nautilus_tutorial:hovorod
15
14
command : ["/bin/bash","-c"]
16
15
# NCCL_DEBUG=INFO
17
16
args : ['git clone https://github.com/Rose-STL-Lab/nautilus_tutorial.git;
43
42
operator : In
44
43
values :
45
44
- NVIDIA-GeForce-RTX-3090
46
- - key : kubernetes.io/hostname
47
- operator : In
48
- values :
49
- - k8s-3090-02.clemson.edu
50
45
volumes :
51
46
- name : stpp-vol
52
47
persistentVolumeClaim :
Original file line number Diff line number Diff line change @@ -5,13 +5,12 @@ apiVersion: batch/v1
5
5
kind : Job
6
6
metadata :
7
7
name : mnist
8
- namespace : deep-forecast
9
8
spec :
10
9
template :
11
10
spec :
12
11
containers :
13
12
- name : gpu-container
14
- image : docker. io/horovod/horovod:sha-811cf67
13
+ image : gitlab-registry.nrp-nautilus. io/zihaozhou/nautilus_tutorial:hovorod
15
14
command : ["/bin/bash","-c"]
16
15
# NCCL_DEBUG=INFO
17
16
args : ["git clone https://github.com/Rose-STL-Lab/nautilus_tutorial.git;
41
40
operator : In
42
41
values :
43
42
- NVIDIA-GeForce-RTX-3090
44
- - key : kubernetes.io/hostname
45
- operator : In
46
- values :
47
- - k8s-3090-02.clemson.edu
48
43
volumes :
49
44
- name : stpp-vol
50
45
persistentVolumeClaim :
You can’t perform that action at this time.
0 commit comments