Skip to content

Commit b5a46d6

Browse files
arjkeshsirutBuasai
authored andcommitted
Add python sitecustomize file (aws#4486)
1 parent 56261a2 commit b5a46d6

File tree

7 files changed

+96
-27
lines changed

7 files changed

+96
-27
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import os
2+
3+
try:
4+
if os.path.exists("/usr/local/bin/deep_learning_container.py") and (
5+
os.getenv("OPT_OUT_TRACKING") is None or os.getenv("OPT_OUT_TRACKING", "").lower() != "true"
6+
):
7+
import threading
8+
9+
cmd = "python /usr/local/bin/deep_learning_container.py --framework {FRAMEWORK} --framework-version {FRAMEWORK_VERSION} --container-type {CONTAINER_TYPE} &>/dev/null"
10+
x = threading.Thread(target=lambda: os.system(cmd))
11+
x.setDaemon(True)
12+
x.start()
13+
except Exception:
14+
pass

pytorch/inference/docker/2.4/py3/Dockerfile.cpu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.4/l
189189

190190
COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
191191

192+
COPY sitecustomize.py /usr/local/lib/${PYTHON_SHORT_VERSION}/sitecustomize.py
193+
192194
RUN chmod +x /usr/local/bin/deep_learning_container.py
193195

194196
RUN HOME_DIR=/root \

pytorch/inference/docker/2.4/py3/cu124/Dockerfile.gpu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,8 @@ RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.4/l
242242

243243
COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
244244

245+
COPY sitecustomize.py /usr/local/lib/${PYTHON_SHORT_VERSION}/sitecustomize.py
246+
245247
RUN chmod +x /usr/local/bin/deep_learning_container.py
246248

247249
RUN HOME_DIR=/root \

src/image_builder.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,39 @@ def image_builder(buildspec, image_types=[], device_types=[]):
240240
}
241241
}
242242
)
243+
# job_type will be either inference or training, based on the repo URI
244+
if "training" in image_repo_uri:
245+
label_job_type = "training"
246+
elif "inference" in image_repo_uri:
247+
label_job_type = "inference"
248+
else:
249+
raise RuntimeError(
250+
f"Cannot find inference or training job type in {image_repo_uri}. "
251+
f"This is required to set job_type label."
252+
)
253+
254+
template_file = os.path.join(
255+
os.sep, get_cloned_folder_path(), "miscellaneous_scripts", "dlc_template.py"
256+
)
257+
258+
template_fw_version = (
259+
str(image_config["framework_version"])
260+
if image_config.get("framework_version")
261+
else str(BUILDSPEC["version"])
262+
)
263+
template_fw = str(BUILDSPEC["framework"])
264+
post_template_file = utils.generate_dlc_cmd(
265+
template_path=template_file,
266+
output_path=os.path.join(image_config["root"], "out.py"),
267+
framework=template_fw,
268+
framework_version=template_fw_version,
269+
container_type=label_job_type,
270+
)
271+
272+
ARTIFACTS.update(
273+
{"customize": {"source": post_template_file, "target": "sitecustomize.py"}}
274+
)
275+
243276
context = Context(ARTIFACTS, f"build/{image_name}.tar.gz", image_config["root"])
244277

245278
if "labels" in image_config:
@@ -265,17 +298,6 @@ def image_builder(buildspec, image_types=[], device_types=[]):
265298
label_contributor = str(BUILDSPEC.get("contributor"))
266299
label_transformers_version = str(transformers_version).replace(".", "-")
267300

268-
# job_type will be either inference or training, based on the repo URI
269-
if "training" in image_repo_uri:
270-
label_job_type = "training"
271-
elif "inference" in image_repo_uri:
272-
label_job_type = "inference"
273-
else:
274-
raise RuntimeError(
275-
f"Cannot find inference or training job type in {image_repo_uri}. "
276-
f"This is required to set job_type label."
277-
)
278-
279301
if cx_type == "sagemaker":
280302
# Adding standard labels to all images
281303
labels[

src/utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,3 +642,23 @@ def verify_if_child_image_is_built_on_top_of_base_image(base_image_uri, child_im
642642
if base_layer_sha != child_layer_sha:
643643
return False
644644
return True
645+
646+
647+
def generate_dlc_cmd(template_path, output_path, framework, framework_version, container_type):
648+
with open(template_path, "r") as tf:
649+
content = tf.read()
650+
651+
replacements = {
652+
"FRAMEWORK": framework,
653+
"FRAMEWORK_VERSION": framework_version,
654+
"CONTAINER_TYPE": container_type,
655+
}
656+
657+
for anchor, value in replacements.items():
658+
content = content.replace(f"{{{anchor}}}", value)
659+
660+
with open(output_path, "w") as out_f:
661+
out_f.write(content)
662+
663+
# Return base path and set as artifact
664+
return os.path.basename(output_path)

test/dlc_tests/container_tests/bin/pytorch_tests/test_pt_dlc_telemetry_test.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ def _clean_up_reports():
1010
os.system("rm /tmp/test_tag_request.txt")
1111

1212

13-
def opt_in_opt_out_test():
13+
def opt_in_opt_out_test(exec_cmd):
1414
os.environ["TEST_MODE"] = "1"
1515

1616
for opt_out_value in ["True", "TRUE", "true"]:
1717
_clean_up_reports()
1818
os.environ["OPT_OUT_TRACKING"] = opt_out_value
19-
cmd = "python -c 'import torch'"
19+
cmd = f"python -c '{exec_cmd}'"
2020
os.system(cmd)
2121
time.sleep(5)
2222
assert not os.path.exists(
@@ -29,7 +29,7 @@ def opt_in_opt_out_test():
2929
for opt_out_value in ["False", "XYgg"]:
3030
_clean_up_reports()
3131
os.environ["OPT_OUT_TRACKING"] = opt_out_value
32-
cmd = "python -c 'import torch'"
32+
cmd = f"python -c '{exec_cmd}'"
3333
os.system(cmd)
3434
time.sleep(5)
3535
assert os.path.exists(
@@ -43,23 +43,23 @@ def opt_in_opt_out_test():
4343
print("Opt-In/Opt-Out Test passed")
4444

4545

46-
def perf_test():
46+
def perf_test(exec_cmd):
4747
os.environ["TEST_MODE"] = "0"
4848
os.environ["OPT_OUT_TRACKING"] = "False"
4949
NUM_ITERATIONS = 5
5050

5151
for itr in range(NUM_ITERATIONS):
5252
total_time_in = 0
5353
for x in range(NUM_ITERATIONS):
54-
cmd = "python -c 'import torch'"
54+
cmd = f"python -c '{exec_cmd}'"
5555
start = time.time()
5656
os.system(cmd)
5757
total_time_in += time.time() - start
5858
print("avg out time: ", total_time_in / NUM_ITERATIONS)
5959

6060
total_time_out = 0
6161
for x in range(NUM_ITERATIONS):
62-
cmd = "export OPT_OUT_TRACKING='true' && python -c 'import torch'"
62+
cmd = f"export OPT_OUT_TRACKING='true' && python -c '{exec_cmd}'"
6363
start = time.time()
6464
os.system(cmd)
6565
total_time_out += time.time() - start
@@ -72,7 +72,11 @@ def perf_test():
7272
print("DLC Telemetry performance test Passed")
7373

7474

75-
perf_test()
76-
opt_in_opt_out_test()
75+
perf_test("import torch")
76+
opt_in_opt_out_test("import torch")
77+
78+
# Disabling os tests until it is added to all new images
79+
# perf_test("import os")
80+
# opt_in_opt_out_test("import os")
7781

7882
print("All DLC telemetry test passed")

test/dlc_tests/container_tests/bin/test_tf_dlc_telemetry_test.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,13 @@ def _clean_up_reports():
1010
os.system("rm /tmp/test_tag_request.txt")
1111

1212

13-
def opt_in_opt_out_test():
13+
def opt_in_opt_out_test(exec_cmd):
1414
os.environ["TEST_MODE"] = "1"
1515

1616
for opt_out_value in ["True", "TRUE", "true"]:
1717
_clean_up_reports()
1818
os.environ["OPT_OUT_TRACKING"] = opt_out_value
19-
cmd = "python -c 'import tensorflow'"
19+
cmd = f"python -c '{exec_cmd}'"
2020
os.system(cmd)
2121
time.sleep(5)
2222
assert not os.path.exists(
@@ -29,7 +29,7 @@ def opt_in_opt_out_test():
2929
for opt_out_value in ["False", "XYgg"]:
3030
_clean_up_reports()
3131
os.environ["OPT_OUT_TRACKING"] = opt_out_value
32-
cmd = "python -c 'import tensorflow'"
32+
cmd = f"python -c '{exec_cmd}'"
3333
os.system(cmd)
3434
time.sleep(5)
3535
assert os.path.exists(
@@ -43,23 +43,23 @@ def opt_in_opt_out_test():
4343
print("Opt-In/Opt-Out Test passed")
4444

4545

46-
def performance_test():
46+
def performance_test(exec_cmd):
4747
os.environ["TEST_MODE"] = "0"
4848
os.environ["OPT_OUT_TRACKING"] = "False"
4949
NUM_ITERATIONS = 5
5050

5151
for itr in range(NUM_ITERATIONS):
5252
total_time_in = 0
5353
for x in range(NUM_ITERATIONS):
54-
cmd = "python -c 'import tensorflow'"
54+
cmd = f"python -c '{exec_cmd}'"
5555
start = time.time()
5656
os.system(cmd)
5757
total_time_in += time.time() - start
5858
print("avg out time: ", total_time_in / NUM_ITERATIONS)
5959

6060
total_time_out = 0
6161
for x in range(NUM_ITERATIONS):
62-
cmd = "export OPT_OUT_TRACKING='true' && python -c 'import tensorflow'"
62+
cmd = f"export OPT_OUT_TRACKING='true' && python -c '{exec_cmd}'"
6363
start = time.time()
6464
os.system(cmd)
6565
total_time_out += time.time() - start
@@ -72,7 +72,12 @@ def performance_test():
7272
print("DLC Telemetry performance test Passed")
7373

7474

75-
performance_test()
76-
opt_in_opt_out_test()
75+
# test framework functionality
76+
performance_test("import tensorflow")
77+
opt_in_opt_out_test("import tensorflow")
78+
79+
# Disabling os tests until it is added to all new images
80+
# performance_test("import os")
81+
# opt_in_opt_out_test("import os")
7782

7883
print("All DLC telemetry test passed")

0 commit comments

Comments
 (0)