Add python sitecustomize file (aws#4486)

arjkesh · sirutBuasai · commit b5a46d657210 · 2025-02-07T16:22:52.000-08:00
diff --git a/miscellaneous_scripts/dlc_template.py b/miscellaneous_scripts/dlc_template.py
@@ -0,0 +1,14 @@
+import os
+
+try:
+    if os.path.exists("/usr/local/bin/deep_learning_container.py") and (
+        os.getenv("OPT_OUT_TRACKING") is None or os.getenv("OPT_OUT_TRACKING", "").lower() != "true"
+    ):
+        import threading
+
+        cmd = "python /usr/local/bin/deep_learning_container.py --framework {FRAMEWORK} --framework-version {FRAMEWORK_VERSION} --container-type {CONTAINER_TYPE} &>/dev/null"
+        x = threading.Thread(target=lambda: os.system(cmd))
+        x.setDaemon(True)
+        x.start()
+except Exception:
+    pass
diff --git a/pytorch/inference/docker/2.4/py3/Dockerfile.cpu b/pytorch/inference/docker/2.4/py3/Dockerfile.cpu
@@ -189,6 +189,8 @@ RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.4/l
 
 COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
 
+COPY sitecustomize.py /usr/local/lib/${PYTHON_SHORT_VERSION}/sitecustomize.py
+
 RUN chmod +x /usr/local/bin/deep_learning_container.py
 
 RUN HOME_DIR=/root \
diff --git a/pytorch/inference/docker/2.4/py3/cu124/Dockerfile.gpu b/pytorch/inference/docker/2.4/py3/cu124/Dockerfile.gpu
@@ -242,6 +242,8 @@ RUN curl -o /license.txt https://aws-dlc-licenses.s3.amazonaws.com/pytorch-2.4/l
 
 COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
 
+COPY sitecustomize.py /usr/local/lib/${PYTHON_SHORT_VERSION}/sitecustomize.py
+
 RUN chmod +x /usr/local/bin/deep_learning_container.py
 
 RUN HOME_DIR=/root \
diff --git a/src/image_builder.py b/src/image_builder.py
@@ -240,6 +240,39 @@ def image_builder(buildspec, image_types=[], device_types=[]):
                 }
             }
         )
+        # job_type will be either inference or training, based on the repo URI
+        if "training" in image_repo_uri:
+            label_job_type = "training"
+        elif "inference" in image_repo_uri:
+            label_job_type = "inference"
+        else:
+            raise RuntimeError(
+                f"Cannot find inference or training job type in {image_repo_uri}. "
+                f"This is required to set job_type label."
+            )
+
+        template_file = os.path.join(
+            os.sep, get_cloned_folder_path(), "miscellaneous_scripts", "dlc_template.py"
+        )
+
+        template_fw_version = (
+            str(image_config["framework_version"])
+            if image_config.get("framework_version")
+            else str(BUILDSPEC["version"])
+        )
+        template_fw = str(BUILDSPEC["framework"])
+        post_template_file = utils.generate_dlc_cmd(
+            template_path=template_file,
+            output_path=os.path.join(image_config["root"], "out.py"),
+            framework=template_fw,
+            framework_version=template_fw_version,
+            container_type=label_job_type,
+        )
+
+        ARTIFACTS.update(
+            {"customize": {"source": post_template_file, "target": "sitecustomize.py"}}
+        )
+
         context = Context(ARTIFACTS, f"build/{image_name}.tar.gz", image_config["root"])
 
         if "labels" in image_config:
@@ -265,17 +298,6 @@ def image_builder(buildspec, image_types=[], device_types=[]):
         label_contributor = str(BUILDSPEC.get("contributor"))
         label_transformers_version = str(transformers_version).replace(".", "-")
 
-        # job_type will be either inference or training, based on the repo URI
-        if "training" in image_repo_uri:
-            label_job_type = "training"
-        elif "inference" in image_repo_uri:
-            label_job_type = "inference"
-        else:
-            raise RuntimeError(
-                f"Cannot find inference or training job type in {image_repo_uri}. "
-                f"This is required to set job_type label."
-            )
-
         if cx_type == "sagemaker":
             # Adding standard labels to all images
             labels[
diff --git a/src/utils.py b/src/utils.py
@@ -642,3 +642,23 @@ def verify_if_child_image_is_built_on_top_of_base_image(base_image_uri, child_im
         if base_layer_sha != child_layer_sha:
             return False
     return True
+
+
+def generate_dlc_cmd(template_path, output_path, framework, framework_version, container_type):
+    with open(template_path, "r") as tf:
+        content = tf.read()
+
+    replacements = {
+        "FRAMEWORK": framework,
+        "FRAMEWORK_VERSION": framework_version,
+        "CONTAINER_TYPE": container_type,
+    }
+
+    for anchor, value in replacements.items():
+        content = content.replace(f"{{{anchor}}}", value)
+
+    with open(output_path, "w") as out_f:
+        out_f.write(content)
+
+    # Return base path and set as artifact
+    return os.path.basename(output_path)
diff --git a/test/dlc_tests/container_tests/bin/pytorch_tests/test_pt_dlc_telemetry_test.py b/test/dlc_tests/container_tests/bin/pytorch_tests/test_pt_dlc_telemetry_test.py
@@ -10,13 +10,13 @@ def _clean_up_reports():
         os.system("rm /tmp/test_tag_request.txt")
 
 
-def opt_in_opt_out_test():
+def opt_in_opt_out_test(exec_cmd):
     os.environ["TEST_MODE"] = "1"
 
     for opt_out_value in ["True", "TRUE", "true"]:
         _clean_up_reports()
         os.environ["OPT_OUT_TRACKING"] = opt_out_value
-        cmd = "python -c 'import torch'"
+        cmd = f"python -c '{exec_cmd}'"
         os.system(cmd)
         time.sleep(5)
         assert not os.path.exists(
@@ -29,7 +29,7 @@ def opt_in_opt_out_test():
     for opt_out_value in ["False", "XYgg"]:
         _clean_up_reports()
         os.environ["OPT_OUT_TRACKING"] = opt_out_value
-        cmd = "python -c 'import torch'"
+        cmd = f"python -c '{exec_cmd}'"
         os.system(cmd)
         time.sleep(5)
         assert os.path.exists(
@@ -43,23 +43,23 @@ def opt_in_opt_out_test():
     print("Opt-In/Opt-Out Test passed")
 
 
-def perf_test():
+def perf_test(exec_cmd):
     os.environ["TEST_MODE"] = "0"
     os.environ["OPT_OUT_TRACKING"] = "False"
     NUM_ITERATIONS = 5
 
     for itr in range(NUM_ITERATIONS):
         total_time_in = 0
         for x in range(NUM_ITERATIONS):
-            cmd = "python -c 'import torch'"
+            cmd = f"python -c '{exec_cmd}'"
             start = time.time()
             os.system(cmd)
             total_time_in += time.time() - start
         print("avg out time: ", total_time_in / NUM_ITERATIONS)
 
         total_time_out = 0
         for x in range(NUM_ITERATIONS):
-            cmd = "export OPT_OUT_TRACKING='true' && python -c 'import torch'"
+            cmd = f"export OPT_OUT_TRACKING='true' && python -c '{exec_cmd}'"
             start = time.time()
             os.system(cmd)
             total_time_out += time.time() - start
@@ -72,7 +72,11 @@ def perf_test():
         print("DLC Telemetry performance test Passed")
 
 
-perf_test()
-opt_in_opt_out_test()
+perf_test("import torch")
+opt_in_opt_out_test("import torch")
+
+# Disabling os tests until it is added to all new images
+# perf_test("import os")
+# opt_in_opt_out_test("import os")
 
 print("All DLC telemetry test passed")
diff --git a/test/dlc_tests/container_tests/bin/test_tf_dlc_telemetry_test.py b/test/dlc_tests/container_tests/bin/test_tf_dlc_telemetry_test.py
@@ -10,13 +10,13 @@ def _clean_up_reports():
         os.system("rm /tmp/test_tag_request.txt")
 
 
-def opt_in_opt_out_test():
+def opt_in_opt_out_test(exec_cmd):
     os.environ["TEST_MODE"] = "1"
 
     for opt_out_value in ["True", "TRUE", "true"]:
         _clean_up_reports()
         os.environ["OPT_OUT_TRACKING"] = opt_out_value
-        cmd = "python -c 'import tensorflow'"
+        cmd = f"python -c '{exec_cmd}'"
         os.system(cmd)
         time.sleep(5)
         assert not os.path.exists(
@@ -29,7 +29,7 @@ def opt_in_opt_out_test():
     for opt_out_value in ["False", "XYgg"]:
         _clean_up_reports()
         os.environ["OPT_OUT_TRACKING"] = opt_out_value
-        cmd = "python -c 'import tensorflow'"
+        cmd = f"python -c '{exec_cmd}'"
         os.system(cmd)
         time.sleep(5)
         assert os.path.exists(
@@ -43,23 +43,23 @@ def opt_in_opt_out_test():
     print("Opt-In/Opt-Out Test passed")
 
 
-def performance_test():
+def performance_test(exec_cmd):
     os.environ["TEST_MODE"] = "0"
     os.environ["OPT_OUT_TRACKING"] = "False"
     NUM_ITERATIONS = 5
 
     for itr in range(NUM_ITERATIONS):
         total_time_in = 0
         for x in range(NUM_ITERATIONS):
-            cmd = "python -c 'import tensorflow'"
+            cmd = f"python -c '{exec_cmd}'"
             start = time.time()
             os.system(cmd)
             total_time_in += time.time() - start
         print("avg out time: ", total_time_in / NUM_ITERATIONS)
 
         total_time_out = 0
         for x in range(NUM_ITERATIONS):
-            cmd = "export OPT_OUT_TRACKING='true' && python -c 'import tensorflow'"
+            cmd = f"export OPT_OUT_TRACKING='true' && python -c '{exec_cmd}'"
             start = time.time()
             os.system(cmd)
             total_time_out += time.time() - start
@@ -72,7 +72,12 @@ def performance_test():
         print("DLC Telemetry performance test Passed")
 
 
-performance_test()
-opt_in_opt_out_test()
+# test framework functionality
+performance_test("import tensorflow")
+opt_in_opt_out_test("import tensorflow")
+
+# Disabling os tests until it is added to all new images
+# performance_test("import os")
+# opt_in_opt_out_test("import os")
 
 print("All DLC telemetry test passed")