Skip to content

Commit e2f6879

Browse files
authored
Add dry run + generalise rclone download (#339)
* add dry run = generalise rclone download * [Automated Commit] Format Codebase [skip ci]
1 parent 1b9a32d commit e2f6879

File tree

16 files changed

+135
-42
lines changed

16 files changed

+135
-42
lines changed

script/download-and-extract/customize.py

+4
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ def preprocess(i):
5454
def postprocess(i):
5555

5656
env = i['env']
57+
58+
if env.get('MLC_DOWNLOAD_MODE') == "dry":
59+
return {'return': 0}
60+
5761
filepath = env.get('MLC_EXTRACT_EXTRACTED_PATH', '')
5862
if filepath == '':
5963
filepath = env.get('MLC_DOWNLOAD_DOWNLOADED_PATH', '')

script/download-and-extract/meta.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ posthook_deps:
3030
MLC_DAE_EXTRACT_DOWNLOADED:
3131
- 'yes'
3232
- 'True'
33+
skip_if_env:
34+
MLC_DOWNLOAD_MODE:
35+
- 'dry'
3336
names:
3437
- extract-script
3538
tags: extract,file

script/download-file/customize.py

+3
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,9 @@ def postprocess(i):
313313

314314
env = i['env']
315315

316+
if env.get('MLC_DOWNLOAD_MODE') == "dry":
317+
return {'return': 0}
318+
316319
filepath = env['MLC_DOWNLOAD_DOWNLOADED_PATH']
317320

318321
if not os.path.exists(filepath):

script/download-file/run.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ if [[ ${require_download} == 1 ]]; then
5050
fi
5151

5252
# Verify checksum again if necessary
53-
if [[ ${MLC_DOWNLOAD_TOOL} == "mlcutil" || ${require_download} == 1 ]]; then
53+
if [[ "${MLC_DOWNLOAD_MODE}" != "dry" && ( "${MLC_DOWNLOAD_TOOL}" == "mlcutil" || ${require_download} == 1 ) ]]; then
5454
if [[ -n "${MLC_DOWNLOAD_CHECKSUM_CMD}" ]]; then
5555
echo -e "\nVerifying checksum after download: ${MLC_DOWNLOAD_CHECKSUM_CMD}"
5656
eval "${MLC_DOWNLOAD_CHECKSUM_CMD}" || exit $?

script/get-dataset-waymo-calibration/customize.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,9 @@ def preprocess(i):
1414
if env.get('MLC_DATASET_WAYMO_CALIBRATION_PATH', '') != '':
1515
if not os.path.exists(env['MLC_DATASET_WAYMO_CALIBRATION_PATH']):
1616
return {
17-
'return': 1, 'error': f"Path {env['MLC_DATASET_WAYMO_CALIBRATION_PATH']} does not exists!"}
17+
'return': 1, 'error': f"Path {env['MLC_DATASET_WAYMO_CALIBRATION_PATH']} does not exist!"}
1818
else:
1919
env['MLC_TMP_REQUIRE_DOWNLOAD'] = "yes"
20-
if env['MLC_DOWNLOAD_SRC'] == "mlcommons":
21-
i['run_script_input']['script_name'] = 'run-rclone'
22-
if env.get('MLC_OUTDIRNAME', '') != '':
23-
env['MLC_DATASET_WAYMO_CALIBRATION_PATH'] = env['MLC_OUTDIRNAME']
24-
else:
25-
env['MLC_DATASET_WAYMO_CALIBRATION_PATH'] = os.path.join(
26-
os.getcwd(), "kitti_format", "calibration")
2720

2821
return {'return': 0}
2922

script/get-dataset-waymo-calibration/meta.yaml

+30
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,33 @@ variations:
3333
- true
3434
force_cache: true
3535
tags: get,rclone-config,_waymo
36+
- enable_if_env:
37+
MLC_TMP_REQUIRE_DOWNLOAD:
38+
- 'yes'
39+
env:
40+
MLC_DOWNLOAD_FINAL_ENV_NAME: MLC_DATASET_WAYMO_CALIBRATION_PATH
41+
MLC_EXTRACT_FINAL_ENV_NAME: MLC_DATASET_WAYMO_CALIBRATION_PATH
42+
MLC_DOWNLOAD_URL: mlc-waymo:waymo_preprocessed_dataset/kitti_format/testing
43+
extra_cache_tags: waymo,dataset
44+
force_cache: true
45+
names:
46+
- dae
47+
tags: download-and-extract
48+
force_env_keys:
49+
- MLC_OUTDIRNAME
50+
update_tags_from_env_with_prefix:
51+
_url.:
52+
- MLC_DOWNLOAD_URL
53+
rclone:
54+
group: download-tool
55+
add_deps_recursive:
56+
dae:
57+
tags: _rclone
58+
default: true
59+
dry-run:
60+
group: run-mode
61+
env:
62+
MLC_DOWNLOAD_MODE: dry
63+
dry-run,rclone:
64+
env:
65+
MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run

script/get-dataset-waymo-calibration/run-rclone.sh

-4
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
if [[ "$MLC_DOWNLOAD_MODE" != "dry" && "$MLC_TMP_REQUIRE_DOWNLOAD" = "true" ]]; then
2+
cd "${MLC_DATASET_WAYMO_CALIBRATION_PATH}/testing" || exit
3+
for f in *.tar.gz; do tar -xzvf "$f"; done
4+
cd - || exit
5+
fi

script/get-dataset-waymo/customize.py

-7
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,6 @@ def preprocess(i):
1717
'return': 1, 'error': f"Path {env['MLC_DATASET_WAYMO_PATH']} does not exists!"}
1818
else:
1919
env['MLC_TMP_REQUIRE_DOWNLOAD'] = "yes"
20-
if env['MLC_DOWNLOAD_SRC'] == "mlcommons":
21-
i['run_script_input']['script_name'] = 'run-rclone'
22-
if env.get('MLC_OUTDIRNAME', '') != '':
23-
env['MLC_DATASET_WAYMO_PATH'] = env['MLC_OUTDIRNAME']
24-
else:
25-
env['MLC_DATASET_WAYMO_PATH'] = os.path.join(
26-
os.getcwd(), "kitti_format")
2720

2821
return {'return': 0}
2922

script/get-dataset-waymo/meta.yaml

+30
Original file line numberDiff line numberDiff line change
@@ -30,5 +30,35 @@ variations:
3030
enable_if_env:
3131
MLC_TMP_REQUIRE_DOWNLOAD:
3232
- yes
33+
- enable_if_env:
34+
MLC_TMP_REQUIRE_DOWNLOAD:
35+
- 'yes'
36+
env:
37+
MLC_DOWNLOAD_FINAL_ENV_NAME: MLC_DATASET_WAYMO_PATH
38+
MLC_EXTRACT_FINAL_ENV_NAME: MLC_DATASET_WAYMO_PATH
39+
MLC_DOWNLOAD_URL: mlc-waymo:waymo_preprocessed_dataset/kitti_format
40+
extra_cache_tags: waymo,dataset
41+
force_cache: true
42+
names:
43+
- dae
44+
tags: download-and-extract
45+
force_env_keys:
46+
- MLC_OUTDIRNAME
47+
update_tags_from_env_with_prefix:
48+
_url.:
49+
- MLC_DOWNLOAD_URL
3350
env:
3451
MLC_DOWNLOAD_SRC: mlcommons
52+
rclone:
53+
group: download-tool
54+
add_deps_recursive:
55+
dae:
56+
tags: _rclone
57+
default: true
58+
dry-run:
59+
group: run-mode
60+
env:
61+
MLC_DOWNLOAD_MODE: dry
62+
dry-run,rclone:
63+
env:
64+
MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run

script/get-dataset-waymo/run-rclone.sh

-7
This file was deleted.

script/get-dataset-waymo/run.sh

+6
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,9 @@
66
#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out
77

88
#${MLC_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency
9+
10+
if [[ "$MLC_DOWNLOAD_MODE" != "dry" && "$MLC_TMP_REQUIRE_DOWNLOAD" = "true" ]]; then
11+
cd "${MLC_DATASET_WAYMO_PATH}/kitti_format/training" || exit
12+
for f in *.tar.gz; do tar -xzvf "$f"; done
13+
cd - || exit
14+
fi

script/get-ml-model-llama3/customize.py

+4-9
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,11 @@ def preprocess(i):
1515

1616
path = env.get('MLC_OUTDIRNAME', '').strip()
1717

18-
if path != "":
18+
if path != "" and env.get('MLC_DOWNLOAD_SRC', '') == "huggingface":
1919
os.makedirs(path, exist_ok=True)
2020
env['MLC_GIT_CHECKOUT_FOLDER'] = os.path.join(
2121
path, env['MLC_ML_MODEL_NAME'])
2222

23-
if env['MLC_DOWNLOAD_SRC'] == "mlcommons":
24-
i['run_script_input']['script_name'] = 'run-rclone'
25-
if env.get('MLC_OUTDIRNAME', '') != '':
26-
env['LLAMA3_CHECKPOINT_PATH'] = env['MLC_OUTDIRNAME']
27-
else:
28-
env['LLAMA3_CHECKPOINT_PATH'] = os.getcwd()
2923
env['MLC_TMP_REQUIRE_DOWNLOAD'] = 'yes'
3024

3125
return {'return': 0}
@@ -35,7 +29,8 @@ def postprocess(i):
3529

3630
env = i['env']
3731

38-
env['MLC_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] = env['LLAMA3_CHECKPOINT_PATH']
39-
env['MLC_GET_DEPENDENT_CACHED_PATH'] = env['MLC_ML_MODEL_PATH']
32+
if env.get('MLC_DOWNLOAD_MODE', '') != "dry":
33+
env['MLC_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] = env['LLAMA3_CHECKPOINT_PATH']
34+
env['MLC_GET_DEPENDENT_CACHED_PATH'] = env['MLC_ML_MODEL_PATH']
4035

4136
return {'return': 0}

script/get-ml-model-llama3/meta.yaml

+30
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,38 @@ variations:
6161
enable_if_env:
6262
MLC_TMP_REQUIRE_DOWNLOAD:
6363
- yes
64+
- enable_if_env:
65+
MLC_TMP_REQUIRE_DOWNLOAD:
66+
- 'yes'
67+
env:
68+
MLC_DOWNLOAD_FINAL_ENV_NAME: LLAMA3_CHECKPOINT_PATH
69+
MLC_EXTRACT_FINAL_ENV_NAME: LLAMA3_CHECKPOINT_PATH
70+
MLC_DOWNLOAD_URL: mlc-llama3-1:inference/<<<MLC_ML_MODEL_NAME>>>
71+
extra_cache_tags: waymo,dataset
72+
force_cache: true
73+
names:
74+
- dae
75+
tags: download-and-extract
76+
force_env_keys:
77+
- MLC_OUTDIRNAME
78+
update_tags_from_env_with_prefix:
79+
_url.:
80+
- MLC_DOWNLOAD_URL
6481
env:
6582
MLC_DOWNLOAD_SRC: mlcommons
83+
rclone:
84+
group: download-tool
85+
add_deps_recursive:
86+
dae:
87+
tags: _rclone
88+
default: true
89+
dry-run:
90+
group: run-mode
91+
env:
92+
MLC_DOWNLOAD_MODE: dry
93+
dry-run,rclone:
94+
env:
95+
MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run
6696
hf:
6797
group: download-src
6898
default_variations:

script/get-ml-model-llama3/run-rclone.sh

-4
This file was deleted.

script/get-preprocessed-dataset-criteo/meta.yaml

+18-2
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,28 @@ variations:
142142
MLC_EXTRACT_FINAL_ENV_NAME: MLC_DATASET_PREPROCESSED_PATH
143143
MLC_EXTRACT_TO_FOLDER: criteo-preprocessed
144144
MLC_RCLONE_CONFIG_NAME: mlc-inference
145-
MLC_RCLONE_URL: mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed
145+
MLC_DOWNLOAD_URL: mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed
146146
extra_cache_tags: criteo,preprocessed,dataset
147147
force_cache: true
148148
names:
149149
- dae
150-
tags: download-and-extract,_rclone,_url.mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed
150+
tags: download-and-extract
151+
update_tags_from_env_with_prefix:
152+
_url.:
153+
- MLC_DOWNLOAD_URL
154+
rclone:
155+
group: download-tool
156+
add_deps_recursive:
157+
dae:
158+
tags: _rclone
159+
default: true
160+
dry-run:
161+
group: run-mode
162+
env:
163+
MLC_DOWNLOAD_MODE: dry
164+
dry-run,rclone:
165+
env:
166+
MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run
151167
preprocess:
152168
group: src
153169
validation:

0 commit comments

Comments
 (0)