Skip to content

Commit 4f7ddef

Browse files
committed
Add model card for ESM and PLS models
1 parent d094419 commit 4f7ddef

23 files changed

Lines changed: 1281 additions & 1298 deletions

File tree

benchmark/supervised/aws/dvc.lock

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ stages:
55
- aws s3 cp ../../../datasets/ s3://mango-pe/pg2-benchmark-training-data/datasets
66
--recursive --exclude "*" --include "*.zip"
77
- aws s3 cp ../../../models/ s3://mango-pe/pg2-benchmark-training-data/models
8-
--recursive --exclude "*" --include "*.toml"
8+
--recursive --exclude "*" --include "README.md"
99
- echo "Upload completed at $(date)" > logs/s3_upload_complete.txt
1010
deps:
1111
- path: ../../../datasets/
@@ -15,25 +15,25 @@ stages:
1515
nfiles: 7
1616
- path: ../../../models/
1717
hash: md5
18-
md5: d3ad141f0c54799e8f96bd640c686b79.dir
19-
size: 1381199847
20-
nfiles: 48393
18+
md5: 8a2a310671abbb469d6d45d56c1ec859.dir
19+
size: 1381200239
20+
nfiles: 48391
2121
- path: logs/setup.txt
2222
hash: md5
2323
md5: c861b0c201d5e28492f5dff8fab634f5
2424
size: 26
2525
outs:
2626
- path: logs/s3_upload_complete.txt
2727
hash: md5
28-
md5: b841533c85ad082aeed20da08556f9cc
28+
md5: 2de978e81f106363692c3c775ae44ecb
2929
size: 50
3030
deploy_to_ecr@model0:
3131
cmd:
3232
- aws ecr describe-repositories --repository-names pls --region us-east-1 >/dev/null
3333
2>&1 || aws ecr create-repository --repository-name pls --region us-east-1 >/dev/null
3434
- aws ecr get-login-password --region us-east-1 | docker login --username AWS
3535
--password-stdin ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com
36-
- docker buildx build --build-arg GIT_CACHE_BUST=1 --platform linux/amd64,linux/arm64
36+
- docker buildx build --build-arg GIT_CACHE_BUST=2 --platform linux/amd64,linux/arm64
3737
--secret id=git_auth,src=../git-auth.txt -f ../../../models/pls/Dockerfile -t
3838
${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/pls:latest ../../.. --push
3939
- echo "ECR push completed at $(date)" > logs/ecr_push_complete.txt
@@ -50,104 +50,104 @@ stages:
5050
outs:
5151
- path: logs/ecr_push_complete.txt
5252
hash: md5
53-
md5: e59aeee19986bf50446a104090b14741
53+
md5: 8225867a9f663498ab8e7812c97e0d9a
5454
size: 52
5555
- path: logs/image_uri.txt
5656
hash: md5
5757
md5: 764fdb00245735eff50661b2da0d51a1
5858
size: 56
5959
create_training_job@dataset0-model0:
6060
cmd: "uv run pg2-benchmark sagemaker create-training-job --model-name pls --region-name
61-
us-east-1 --sagemaker-role-name rnd-dev-sagemakerexecutionrole --ecr-repository-uri
61+
us-east-1 --sagemaker-role-name rnd-dev-sagemakerexecutionrole --ecr-repository-uri
6262
${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/pls --s3-training-data-prefix
6363
mango-pe/pg2-benchmark-training-data --s3-output-prefix mango-pe/pg2-benchmark-output
6464
--instance-type ml.m5.xlarge --volume-size 5 --dataset-prefix dummy --model-prefix
6565
pls > logs/create_job_charge_ladder_pls.txt\n"
6666
deps:
6767
- path: ../../../src/pg2_benchmark/cli/sagemaker.py
6868
hash: md5
69-
md5: 51bdf7c32397e96258c0b0a5eb2e9759
70-
size: 5176
69+
md5: 47ce56161a649f4829dd64c6bde4b4bf
70+
size: 5178
7171
- path: logs/ecr_push_complete.txt
7272
hash: md5
73-
md5: e59aeee19986bf50446a104090b14741
73+
md5: 8225867a9f663498ab8e7812c97e0d9a
7474
size: 52
7575
- path: logs/image_uri.txt
7676
hash: md5
7777
md5: 764fdb00245735eff50661b2da0d51a1
7878
size: 56
7979
- path: logs/s3_upload_complete.txt
8080
hash: md5
81-
md5: b841533c85ad082aeed20da08556f9cc
81+
md5: 2de978e81f106363692c3c775ae44ecb
8282
size: 50
8383
outs:
8484
- path: logs/create_job_charge_ladder_pls.txt
8585
hash: md5
86-
md5: a746456fd6bf63aa1dc9998e9bc74ed0
86+
md5: 9840699441bb4a04e370720b50541308
8787
size: 20
8888
create_training_job@dataset1-model0:
8989
cmd: "uv run pg2-benchmark sagemaker create-training-job --model-name pls --region-name
90-
us-east-1 --sagemaker-role-name rnd-dev-sagemakerexecutionrole --ecr-repository-uri
90+
us-east-1 --sagemaker-role-name rnd-dev-sagemakerexecutionrole --ecr-repository-uri
9191
${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/pls --s3-training-data-prefix
9292
mango-pe/pg2-benchmark-training-data --s3-output-prefix mango-pe/pg2-benchmark-output
9393
--instance-type ml.m5.xlarge --volume-size 5 --dataset-prefix neime --model-prefix
9494
pls > logs/create_job_neime_pls.txt\n"
9595
deps:
9696
- path: ../../../src/pg2_benchmark/cli/sagemaker.py
9797
hash: md5
98-
md5: 51bdf7c32397e96258c0b0a5eb2e9759
99-
size: 5176
98+
md5: 47ce56161a649f4829dd64c6bde4b4bf
99+
size: 5178
100100
- path: logs/ecr_push_complete.txt
101101
hash: md5
102-
md5: e59aeee19986bf50446a104090b14741
102+
md5: 8225867a9f663498ab8e7812c97e0d9a
103103
size: 52
104104
- path: logs/image_uri.txt
105105
hash: md5
106106
md5: 764fdb00245735eff50661b2da0d51a1
107107
size: 56
108108
- path: logs/s3_upload_complete.txt
109109
hash: md5
110-
md5: b841533c85ad082aeed20da08556f9cc
110+
md5: 2de978e81f106363692c3c775ae44ecb
111111
size: 50
112112
outs:
113113
- path: logs/create_job_neime_pls.txt
114114
hash: md5
115-
md5: bc8eb0dd0c04cc015516c0237401a538
115+
md5: b2760cf024c3f96ce24798aa7566f8c0
116116
size: 20
117117
monitor_training_job@dataset0-model0:
118118
cmd: "uv run pg2-benchmark sagemaker monitor-training-job --region-name us-east-1
119119
--job-name $(cat logs/create_job_charge_ladder_pls.txt) > logs/monitor_job_charge_ladder_pls.txt\n"
120120
deps:
121121
- path: ../../../src/pg2_benchmark/cli/sagemaker.py
122122
hash: md5
123-
md5: 51bdf7c32397e96258c0b0a5eb2e9759
124-
size: 5176
123+
md5: 47ce56161a649f4829dd64c6bde4b4bf
124+
size: 5178
125125
- path: logs/create_job_charge_ladder_pls.txt
126126
hash: md5
127-
md5: a746456fd6bf63aa1dc9998e9bc74ed0
127+
md5: 9840699441bb4a04e370720b50541308
128128
size: 20
129129
outs:
130130
- path: logs/monitor_job_charge_ladder_pls.txt
131131
hash: md5
132-
md5: ebce6202916ab76ffc188be21d7222ae
133-
size: 158
132+
md5: b83ea2ee7bc48007cd5f240683cdc043
133+
size: 245
134134
monitor_training_job@dataset1-model0:
135135
cmd: "uv run pg2-benchmark sagemaker monitor-training-job --region-name us-east-1
136136
--job-name $(cat logs/create_job_neime_pls.txt) > logs/monitor_job_neime_pls.txt\n"
137137
deps:
138138
- path: ../../../src/pg2_benchmark/cli/sagemaker.py
139139
hash: md5
140-
md5: 51bdf7c32397e96258c0b0a5eb2e9759
141-
size: 5176
140+
md5: 47ce56161a649f4829dd64c6bde4b4bf
141+
size: 5178
142142
- path: logs/create_job_neime_pls.txt
143143
hash: md5
144-
md5: bc8eb0dd0c04cc015516c0237401a538
144+
md5: b2760cf024c3f96ce24798aa7566f8c0
145145
size: 20
146146
outs:
147147
- path: logs/monitor_job_neime_pls.txt
148148
hash: md5
149-
md5: 1b1592af89d391864d56058b621d2d4c
150-
size: 245
149+
md5: 930f15c1dc41b2d72f0d24c430dd4609
150+
size: 216
151151
calculate_metric@dataset0-model0:
152152
cmd:
153153
- aws s3 cp s3://mango-pe/pg2-benchmark-output/$(cat logs/create_job_charge_ladder_pls.txt)/output/model.tar.gz
@@ -159,12 +159,12 @@ stages:
159159
deps:
160160
- path: logs/monitor_job_charge_ladder_pls.txt
161161
hash: md5
162-
md5: ebce6202916ab76ffc188be21d7222ae
163-
size: 158
162+
md5: b83ea2ee7bc48007cd5f240683cdc043
163+
size: 245
164164
outs:
165165
- path: metric/charge_ladder_pls.csv
166166
hash: md5
167-
md5: a60602f67fbd13b9da7ba2535995e97e
167+
md5: a94b29b80493faa875ea39623058344b
168168
size: 1457
169169
calculate_metric@dataset1-model0:
170170
cmd:
@@ -177,13 +177,13 @@ stages:
177177
deps:
178178
- path: logs/monitor_job_neime_pls.txt
179179
hash: md5
180-
md5: 1b1592af89d391864d56058b621d2d4c
181-
size: 245
180+
md5: 930f15c1dc41b2d72f0d24c430dd4609
181+
size: 216
182182
outs:
183183
- path: metric/neime_pls.csv
184184
hash: md5
185-
md5: b3475d5df5e71acd2bcdd5854f5640d3
186-
size: 1626
185+
md5: 43a6a51e0ff20d2a322412eac6e524bb
186+
size: 1616
187187
setup:
188188
cmd:
189189
- mkdir -p logs output metric

benchmark/supervised/aws/dvc.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ stages:
2525
upload_to_s3:
2626
cmd:
2727
- aws s3 cp ${source.datasets_dir}/ s3://${aws.s3_training_data_prefix}/datasets --recursive --exclude "*" --include "*.zip"
28-
- aws s3 cp ${source.models_dir}/ s3://${aws.s3_training_data_prefix}/models --recursive --exclude "*" --include "*.toml"
28+
- aws s3 cp ${source.models_dir}/ s3://${aws.s3_training_data_prefix}/models --recursive --exclude "*" --include "README.md"
2929
- echo "Upload completed at $(date)" > logs/s3_upload_complete.txt
3030
deps:
3131
- logs/setup.txt

benchmark/supervised/local/dvc.lock

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ stages:
66
-f ../../../models/pls/Dockerfile -t pls:latest ../../..
77
- docker run --rm -v $(realpath ../../../datasets):/datasets -v $(realpath ../../../models):/models
88
-v $(realpath output):/opt/ml/model pls:latest train --dataset-file /datasets/dummy/dataset.zip
9-
--model-toml-file /models/pls/manifest.toml
9+
--model-card-file /models/pls/README.md
1010
- docker image prune -a -f
1111
deps:
1212
- path: ../../../datasets/dummy/dataset.zip
@@ -17,26 +17,26 @@ stages:
1717
hash: md5
1818
md5: 901a42e51cf4a14665099985ee9c30de
1919
size: 1138
20-
- path: ../../../models/pls/manifest.toml
20+
- path: ../../../models/pls/README.md
2121
hash: md5
22-
md5: 751569f65bbbc4d796ddc02a70dce809
23-
size: 185
22+
md5: 9fad88f2cb03369789d7143c7ee4844b
23+
size: 837
2424
- path: logs/setup.txt
2525
hash: md5
2626
md5: c861b0c201d5e28492f5dff8fab634f5
2727
size: 26
2828
outs:
2929
- path: output/charge_ladder_pls.csv
3030
hash: md5
31-
md5: 24c58ef7387c61823caab5308d78c3ba
32-
size: 13770
31+
md5: 2331ac53475416456e701ecdf6ee51b5
32+
size: 13780
3333
create_training_job@dataset1-model0:
3434
cmd:
3535
- docker build --build-arg GIT_CACHE_BUST=1 --secret id=git_auth,src=../git-auth.txt
3636
-f ../../../models/pls/Dockerfile -t pls:latest ../../..
3737
- docker run --rm -v $(realpath ../../../datasets):/datasets -v $(realpath ../../../models):/models
3838
-v $(realpath output):/opt/ml/model pls:latest train --dataset-file /datasets/neime/dataset.zip
39-
--model-toml-file /models/pls/manifest.toml
39+
--model-card-file /models/pls/README.md
4040
- docker image prune -a -f
4141
deps:
4242
- path: ../../../datasets/neime/dataset.zip
@@ -47,45 +47,45 @@ stages:
4747
hash: md5
4848
md5: 901a42e51cf4a14665099985ee9c30de
4949
size: 1138
50-
- path: ../../../models/pls/manifest.toml
50+
- path: ../../../models/pls/README.md
5151
hash: md5
52-
md5: 751569f65bbbc4d796ddc02a70dce809
53-
size: 185
52+
md5: 9fad88f2cb03369789d7143c7ee4844b
53+
size: 837
5454
- path: logs/setup.txt
5555
hash: md5
5656
md5: c861b0c201d5e28492f5dff8fab634f5
5757
size: 26
5858
outs:
5959
- path: output/neime_pls.csv
6060
hash: md5
61-
md5: ea6cb8d915611f962ee44dbf07bbe086
62-
size: 35152
61+
md5: 0ba5399bd48be7053fd80f12065daf08
62+
size: 35028
6363
calculate_metric@dataset0-model0:
6464
cmd: uv run pg2-benchmark metric calc --output-path output/charge_ladder_pls.csv
6565
--metric-path metric/charge_ladder_pls.csv
6666
deps:
6767
- path: output/charge_ladder_pls.csv
6868
hash: md5
69-
md5: 24c58ef7387c61823caab5308d78c3ba
70-
size: 13770
69+
md5: 2331ac53475416456e701ecdf6ee51b5
70+
size: 13780
7171
outs:
7272
- path: metric/charge_ladder_pls.csv
7373
hash: md5
74-
md5: 82e59e4f463f6613ab5dffe732f09e59
74+
md5: 566553f3fe5f4eca170de4a58d60e007
7575
size: 1457
7676
calculate_metric@dataset1-model0:
7777
cmd: uv run pg2-benchmark metric calc --output-path output/neime_pls.csv --metric-path
7878
metric/neime_pls.csv
7979
deps:
8080
- path: output/neime_pls.csv
8181
hash: md5
82-
md5: ea6cb8d915611f962ee44dbf07bbe086
83-
size: 35152
82+
md5: 0ba5399bd48be7053fd80f12065daf08
83+
size: 35028
8484
outs:
8585
- path: metric/neime_pls.csv
8686
hash: md5
87-
md5: 54bba84b8baed2ab0bdf634da18c2747
88-
size: 1620
87+
md5: 64464e0f19baeb76e4921e583a991427
88+
size: 1623
8989
setup:
9090
cmd:
9191
- mkdir -p logs output metric

benchmark/supervised/local/dvc.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ vars:
1212

1313
- models:
1414
- name: pls
15-
container_path: /models/pls/manifest.toml
16-
local_path: ../../../models/pls/manifest.toml
15+
container_path: /models/pls/README.md
16+
local_path: ../../../models/pls/README.md
1717
dockerfile: ../../../models/pls/Dockerfile
1818

1919
stages:
@@ -32,7 +32,7 @@ stages:
3232

3333
cmd:
3434
- docker build --build-arg GIT_CACHE_BUST=${git.git_cache_bust} --secret id=git_auth,src=../git-auth.txt -f ${item.model.dockerfile} -t ${item.model.name}:latest ../../..
35-
- docker run --rm -v $(realpath ${source.datasets_dir}):/datasets -v $(realpath ${source.models_dir}):/models -v $(realpath ${destination.output_dir}):/opt/ml/model ${item.model.name}:latest train --dataset-file ${item.dataset.container_path} --model-toml-file ${item.model.container_path}
35+
- docker run --rm -v $(realpath ${source.datasets_dir}):/datasets -v $(realpath ${source.models_dir}):/models -v $(realpath ${destination.output_dir}):/opt/ml/model ${item.model.name}:latest train --dataset-file ${item.dataset.container_path} --model-card-file ${item.model.container_path}
3636
- docker image prune -a -f
3737

3838
deps:

benchmark/zero_shot/aws/dvc.lock

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ stages:
55
- aws s3 cp ../../../datasets/ s3://mango-pe/pg2-benchmark-training-data/datasets
66
--recursive --exclude "*" --include "*.zip"
77
- aws s3 cp ../../../models/ s3://mango-pe/pg2-benchmark-training-data/models
8-
--recursive --exclude "*" --include "*.toml"
8+
--recursive --exclude "*" --include "README.md"
99
- echo "Upload completed at $(date)" > logs/s3_upload_complete.txt
1010
deps:
1111
- path: ../../../datasets/
@@ -15,17 +15,17 @@ stages:
1515
nfiles: 7
1616
- path: ../../../models/
1717
hash: md5
18-
md5: d3ad141f0c54799e8f96bd640c686b79.dir
19-
size: 1381199847
20-
nfiles: 48393
18+
md5: 8a2a310671abbb469d6d45d56c1ec859.dir
19+
size: 1381200239
20+
nfiles: 48391
2121
- path: logs/setup.txt
2222
hash: md5
2323
md5: c861b0c201d5e28492f5dff8fab634f5
2424
size: 26
2525
outs:
2626
- path: logs/s3_upload_complete.txt
2727
hash: md5
28-
md5: b841533c85ad082aeed20da08556f9cc
28+
md5: 2de978e81f106363692c3c775ae44ecb
2929
size: 50
3030
setup:
3131
cmd:

benchmark/zero_shot/aws/dvc.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ stages:
2222
upload_to_s3:
2323
cmd:
2424
- aws s3 cp ${source.datasets_dir}/ s3://${aws.s3_training_data_prefix}/datasets --recursive --exclude "*" --include "*.zip"
25-
- aws s3 cp ${source.models_dir}/ s3://${aws.s3_training_data_prefix}/models --recursive --exclude "*" --include "*.toml"
25+
- aws s3 cp ${source.models_dir}/ s3://${aws.s3_training_data_prefix}/models --recursive --exclude "*" --include "README.md"
2626
- echo "Upload completed at $(date)" > logs/s3_upload_complete.txt
2727
deps:
2828
- logs/setup.txt

0 commit comments

Comments
 (0)