Skip to content

Add dry run + generalise rclone download #339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions script/download-and-extract/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ def preprocess(i):
def postprocess(i):

env = i['env']

if env.get('MLC_DOWNLOAD_MODE') == "dry":
return {'return': 0}

filepath = env.get('MLC_EXTRACT_EXTRACTED_PATH', '')
if filepath == '':
filepath = env.get('MLC_DOWNLOAD_DOWNLOADED_PATH', '')
Expand Down
3 changes: 3 additions & 0 deletions script/download-and-extract/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ posthook_deps:
MLC_DAE_EXTRACT_DOWNLOADED:
- 'yes'
- 'True'
skip_if_env:
MLC_DOWNLOAD_MODE:
- 'dry'
names:
- extract-script
tags: extract,file
Expand Down
3 changes: 3 additions & 0 deletions script/download-file/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,9 @@ def postprocess(i):

env = i['env']

if env.get('MLC_DOWNLOAD_MODE') == "dry":
return {'return': 0}

filepath = env['MLC_DOWNLOAD_DOWNLOADED_PATH']

if not os.path.exists(filepath):
Expand Down
2 changes: 1 addition & 1 deletion script/download-file/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ if [[ ${require_download} == 1 ]]; then
fi

# Verify checksum again if necessary
if [[ ${MLC_DOWNLOAD_TOOL} == "mlcutil" || ${require_download} == 1 ]]; then
if [[ "${MLC_DOWNLOAD_MODE}" != "dry" && ( "${MLC_DOWNLOAD_TOOL}" == "mlcutil" || ${require_download} == 1 ) ]]; then
if [[ -n "${MLC_DOWNLOAD_CHECKSUM_CMD}" ]]; then
echo -e "\nVerifying checksum after download: ${MLC_DOWNLOAD_CHECKSUM_CMD}"
eval "${MLC_DOWNLOAD_CHECKSUM_CMD}" || exit $?
Expand Down
9 changes: 1 addition & 8 deletions script/get-dataset-waymo-calibration/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,9 @@ def preprocess(i):
if env.get('MLC_DATASET_WAYMO_CALIBRATION_PATH', '') != '':
if not os.path.exists(env['MLC_DATASET_WAYMO_CALIBRATION_PATH']):
return {
'return': 1, 'error': f"Path {env['MLC_DATASET_WAYMO_CALIBRATION_PATH']} does not exists!"}
'return': 1, 'error': f"Path {env['MLC_DATASET_WAYMO_CALIBRATION_PATH']} does not exist!"}
else:
env['MLC_TMP_REQUIRE_DOWNLOAD'] = "yes"
if env['MLC_DOWNLOAD_SRC'] == "mlcommons":
i['run_script_input']['script_name'] = 'run-rclone'
if env.get('MLC_OUTDIRNAME', '') != '':
env['MLC_DATASET_WAYMO_CALIBRATION_PATH'] = env['MLC_OUTDIRNAME']
else:
env['MLC_DATASET_WAYMO_CALIBRATION_PATH'] = os.path.join(
os.getcwd(), "kitti_format", "calibration")

return {'return': 0}

Expand Down
30 changes: 30 additions & 0 deletions script/get-dataset-waymo-calibration/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,33 @@ variations:
- true
force_cache: true
tags: get,rclone-config,_waymo
- enable_if_env:
MLC_TMP_REQUIRE_DOWNLOAD:
- 'yes'
env:
MLC_DOWNLOAD_FINAL_ENV_NAME: MLC_DATASET_WAYMO_CALIBRATION_PATH
MLC_EXTRACT_FINAL_ENV_NAME: MLC_DATASET_WAYMO_CALIBRATION_PATH
MLC_DOWNLOAD_URL: mlc-waymo:waymo_preprocessed_dataset/kitti_format/testing
extra_cache_tags: waymo,dataset
force_cache: true
names:
- dae
tags: download-and-extract
force_env_keys:
- MLC_OUTDIRNAME
update_tags_from_env_with_prefix:
_url.:
- MLC_DOWNLOAD_URL
rclone:
group: download-tool
add_deps_recursive:
dae:
tags: _rclone
default: true
dry-run:
group: run-mode
env:
MLC_DOWNLOAD_MODE: dry
dry-run,rclone:
env:
MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run
4 changes: 0 additions & 4 deletions script/get-dataset-waymo-calibration/run-rclone.sh

This file was deleted.

5 changes: 5 additions & 0 deletions script/get-dataset-waymo-calibration/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
if [[ "$MLC_DOWNLOAD_MODE" != "dry" && "$MLC_TMP_REQUIRE_DOWNLOAD" = "true" ]]; then
cd "${MLC_DATASET_WAYMO_CALIBRATION_PATH}/testing" || exit
for f in *.tar.gz; do tar -xzvf "$f"; done
cd - || exit
fi
7 changes: 0 additions & 7 deletions script/get-dataset-waymo/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,6 @@ def preprocess(i):
'return': 1, 'error': f"Path {env['MLC_DATASET_WAYMO_PATH']} does not exists!"}
else:
env['MLC_TMP_REQUIRE_DOWNLOAD'] = "yes"
if env['MLC_DOWNLOAD_SRC'] == "mlcommons":
i['run_script_input']['script_name'] = 'run-rclone'
if env.get('MLC_OUTDIRNAME', '') != '':
env['MLC_DATASET_WAYMO_PATH'] = env['MLC_OUTDIRNAME']
else:
env['MLC_DATASET_WAYMO_PATH'] = os.path.join(
os.getcwd(), "kitti_format")

return {'return': 0}

Expand Down
30 changes: 30 additions & 0 deletions script/get-dataset-waymo/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,35 @@ variations:
enable_if_env:
MLC_TMP_REQUIRE_DOWNLOAD:
- yes
- enable_if_env:
MLC_TMP_REQUIRE_DOWNLOAD:
- 'yes'
env:
MLC_DOWNLOAD_FINAL_ENV_NAME: MLC_DATASET_WAYMO_PATH
MLC_EXTRACT_FINAL_ENV_NAME: MLC_DATASET_WAYMO_PATH
MLC_DOWNLOAD_URL: mlc-waymo:waymo_preprocessed_dataset/kitti_format
extra_cache_tags: waymo,dataset
force_cache: true
names:
- dae
tags: download-and-extract
force_env_keys:
- MLC_OUTDIRNAME
update_tags_from_env_with_prefix:
_url.:
- MLC_DOWNLOAD_URL
env:
MLC_DOWNLOAD_SRC: mlcommons
rclone:
group: download-tool
add_deps_recursive:
dae:
tags: _rclone
default: true
dry-run:
group: run-mode
env:
MLC_DOWNLOAD_MODE: dry
dry-run,rclone:
env:
MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run
7 changes: 0 additions & 7 deletions script/get-dataset-waymo/run-rclone.sh

This file was deleted.

6 changes: 6 additions & 0 deletions script/get-dataset-waymo/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,9 @@
#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out

#${MLC_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency

if [[ "$MLC_DOWNLOAD_MODE" != "dry" && "$MLC_TMP_REQUIRE_DOWNLOAD" = "true" ]]; then
cd "${MLC_DATASET_WAYMO_PATH}/kitti_format/training" || exit
for f in *.tar.gz; do tar -xzvf "$f"; done
cd - || exit
fi
13 changes: 4 additions & 9 deletions script/get-ml-model-llama3/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,11 @@ def preprocess(i):

path = env.get('MLC_OUTDIRNAME', '').strip()

if path != "":
if path != "" and env.get('MLC_DOWNLOAD_SRC', '') == "huggingface":
os.makedirs(path, exist_ok=True)
env['MLC_GIT_CHECKOUT_FOLDER'] = os.path.join(
path, env['MLC_ML_MODEL_NAME'])

if env['MLC_DOWNLOAD_SRC'] == "mlcommons":
i['run_script_input']['script_name'] = 'run-rclone'
if env.get('MLC_OUTDIRNAME', '') != '':
env['LLAMA3_CHECKPOINT_PATH'] = env['MLC_OUTDIRNAME']
else:
env['LLAMA3_CHECKPOINT_PATH'] = os.getcwd()
env['MLC_TMP_REQUIRE_DOWNLOAD'] = 'yes'

return {'return': 0}
Expand All @@ -35,7 +29,8 @@ def postprocess(i):

env = i['env']

env['MLC_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] = env['LLAMA3_CHECKPOINT_PATH']
env['MLC_GET_DEPENDENT_CACHED_PATH'] = env['MLC_ML_MODEL_PATH']
if env.get('MLC_DOWNLOAD_MODE', '') != "dry":
env['MLC_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] = env['LLAMA3_CHECKPOINT_PATH']
env['MLC_GET_DEPENDENT_CACHED_PATH'] = env['MLC_ML_MODEL_PATH']

return {'return': 0}
30 changes: 30 additions & 0 deletions script/get-ml-model-llama3/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,38 @@ variations:
enable_if_env:
MLC_TMP_REQUIRE_DOWNLOAD:
- yes
- enable_if_env:
MLC_TMP_REQUIRE_DOWNLOAD:
- 'yes'
env:
MLC_DOWNLOAD_FINAL_ENV_NAME: LLAMA3_CHECKPOINT_PATH
MLC_EXTRACT_FINAL_ENV_NAME: LLAMA3_CHECKPOINT_PATH
MLC_DOWNLOAD_URL: mlc-llama3-1:inference/<<<MLC_ML_MODEL_NAME>>>
extra_cache_tags: waymo,dataset
force_cache: true
names:
- dae
tags: download-and-extract
force_env_keys:
- MLC_OUTDIRNAME
update_tags_from_env_with_prefix:
_url.:
- MLC_DOWNLOAD_URL
env:
MLC_DOWNLOAD_SRC: mlcommons
rclone:
group: download-tool
add_deps_recursive:
dae:
tags: _rclone
default: true
dry-run:
group: run-mode
env:
MLC_DOWNLOAD_MODE: dry
dry-run,rclone:
env:
MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run
hf:
group: download-src
default_variations:
Expand Down
4 changes: 0 additions & 4 deletions script/get-ml-model-llama3/run-rclone.sh

This file was deleted.

20 changes: 18 additions & 2 deletions script/get-preprocessed-dataset-criteo/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,28 @@ variations:
MLC_EXTRACT_FINAL_ENV_NAME: MLC_DATASET_PREPROCESSED_PATH
MLC_EXTRACT_TO_FOLDER: criteo-preprocessed
MLC_RCLONE_CONFIG_NAME: mlc-inference
MLC_RCLONE_URL: mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed
MLC_DOWNLOAD_URL: mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed
extra_cache_tags: criteo,preprocessed,dataset
force_cache: true
names:
- dae
tags: download-and-extract,_rclone,_url.mlc-inference:mlcommons-inference-wg-public/dlrm_preprocessed
tags: download-and-extract
update_tags_from_env_with_prefix:
_url.:
- MLC_DOWNLOAD_URL
rclone:
group: download-tool
add_deps_recursive:
dae:
tags: _rclone
default: true
dry-run:
group: run-mode
env:
MLC_DOWNLOAD_MODE: dry
dry-run,rclone:
env:
MLC_DOWNLOAD_EXTRA_OPTIONS: --dry-run
preprocess:
group: src
validation:
Expand Down
Loading