kubeflow · SoumyaRaikwar · Mar 3, 2026
diff --git a/kubeflow/trainer/backends/kubernetes/utils.py b/kubeflow/trainer/backends/kubernetes/utils.py
@@ -322,6 +322,8 @@ def get_command_using_train_func(
     train_func_parameters: dict[str, Any] | None,
     pip_index_urls: list[str],
     packages_to_install: list[str] | None,
+    enable_profiler: bool = False,
+    profiler_dir: str = "/artifacts/profile",
 ) -> list[str]:
     """
     Get the Trainer container command from the given training function and parameters.
@@ -358,7 +360,24 @@ def get_command_using_train_func(
         func_call = f"{train_func.__name__}(**{train_func_parameters})"
 
     # Combine everything into the final code string.
-    func_code = f"{func_code}\n{func_call}\n"
+    if enable_profiler:
+        profiler_code = textwrap.dedent(f"""\
+            import torch
+            from torch.profiler import profile, record_function, ProfilerActivity
+
+            with profile(
+                activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+                record_shapes=True,
+                profile_memory=True,
+                with_stack=True,
+                on_trace_ready=torch.profiler.tensorboard_trace_handler("{profiler_dir}")
+            ) as prof:
+                with record_function("model_training"):
+                    {func_call}
+            """)
+        func_code = f"{func_code}\n{profiler_code}\n"
+    else:
+        func_code = f"{func_code}\n{func_call}\n"
 
     is_mpi = runtime.trainer.command[0] == "mpirun"
     # The default file location for OpenMPI is: /home/mpiuser/<FILE_NAME>.py
@@ -420,14 +439,16 @@ def get_trainer_cr_from_custom_trainer(
             trainer.func_args,
             trainer.pip_index_urls,
             trainer.packages_to_install,
+            trainer.enable_profiler,
+            trainer.profiler_dir,
         )
 
     # Set the TrainJob trainer image if that is set.
-    if trainer.image:
+    if getattr(trainer, "image", None):
         trainer_cr.image = trainer.image
 
     # Add environment variables to the Trainer.
-    if trainer.env:
+    if getattr(trainer, "env", None):
         trainer_cr.env = [
             models.IoK8sApiCoreV1EnvVar(name=key, value=value) for key, value in trainer.env.items()
         ]

diff --git a/kubeflow/trainer/backends/kubernetes/utils_test.py b/kubeflow/trainer/backends/kubernetes/utils_test.py
@@ -477,6 +477,39 @@ def test_get_script_for_python_packages(test_case):
                 ),
             ],
         ),
+        TestCase(
+            name="with profiler enabled",
+            expected_status=SUCCESS,
+            config={
+                "func": (lambda: print("Hello World")),
+                "func_args": None,
+                "runtime": _build_runtime(),
+                "enable_profiler": True,
+                "profiler_dir": "/custom/profile/dir",
+            },
+            expected_output=[
+                "bash",
+                "-c",
+                (
+                    "\nread -r -d '' SCRIPT << EOM\n\n"
+                    '"func": (lambda: print("Hello World")),\n\n'
+                    "import torch\n"
+                    "from torch.profiler import profile, record_function, ProfilerActivity\n\n"
+                    "with profile(\n"
+                    "    activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],\n"
+                    "    record_shapes=True,\n"
+                    "    profile_memory=True,\n"
+                    "    with_stack=True,\n"
+                    '    on_trace_ready=torch.profiler.tensorboard_trace_handler("/custom/profile/dir")\n'
+                    ") as prof:\n"
+                    '    with record_function("model_training"):\n'
+                    "        <lambda>()\n\n\n"
+                    "EOM\n"
+                    'printf "%s" "$SCRIPT" > "utils_test.py"\n'
+                    'python "utils_test.py"'
+                ),
+            ],
+        ),
     ],
 )
 def test_get_command_using_train_func(test_case: TestCase):
@@ -487,6 +520,8 @@ def test_get_command_using_train_func(test_case: TestCase):
             train_func_parameters=test_case.config.get("func_args"),
             pip_index_urls=constants.DEFAULT_PIP_INDEX_URLS,
             packages_to_install=test_case.config.get("packages_to_install", []),
+            enable_profiler=test_case.config.get("enable_profiler", False),
+            profiler_dir=test_case.config.get("profiler_dir", "/artifacts/profile"),
         )
 
         assert test_case.expected_status == SUCCESS

diff --git a/kubeflow/trainer/backends/localprocess/utils.py b/kubeflow/trainer/backends/localprocess/utils.py
@@ -184,6 +184,8 @@ def get_command_using_train_func(
     train_func_parameters: dict[str, Any] | None,
     venv_dir: str,
     train_job_name: str,
+    enable_profiler: bool = False,
+    profiler_dir: str = "/artifacts/profile",
 ) -> str:
     """
     Get the Trainer container command from the given training function and parameters.
@@ -214,9 +216,28 @@ def get_command_using_train_func(
     #     print('Start Training...')
     # train({'lr': 0.01})
     if train_func_parameters is None:
-        func_code = f"{func_code}\n{train_func.__name__}()\n"
+        func_call = f"{train_func.__name__}()"
     else:
-        func_code = f"{func_code}\n{train_func.__name__}({train_func_parameters})\n"
+        func_call = f"{train_func.__name__}(**{train_func_parameters})"
+
+    if enable_profiler:
+        profiler_code = textwrap.dedent(f"""\
+            import torch
+            from torch.profiler import profile, record_function, ProfilerActivity
+
+            with profile(
+                activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
+                record_shapes=True,
+                profile_memory=True,
+                with_stack=True,
+                on_trace_ready=torch.profiler.tensorboard_trace_handler("{profiler_dir}")
+            ) as prof:
+                with record_function("model_training"):
+                    {func_call}
+            """)
+        func_code = f"{func_code}\n{profiler_code}\n"
+    else:
+        func_code = f"{func_code}\n{func_call}\n"
 
     with open(func_file, "w") as f:
         f.write(func_code)
@@ -286,6 +307,8 @@ def get_local_train_job_script(
         train_func=trainer.func,
         train_func_parameters=trainer.func_args,
         train_job_name=train_job_name,
+        enable_profiler=trainer.enable_profiler,
+        profiler_dir=trainer.profiler_dir,
     )
 
     cleanup_script = get_cleanup_venv_script(cleanup_venv=cleanup_venv, venv_dir=venv_dir)

diff --git a/kubeflow/trainer/types/types.py b/kubeflow/trainer/types/types.py
@@ -61,6 +61,8 @@ class CustomTrainer:
     num_nodes: int | None = None
     resources_per_node: dict | None = None
     env: dict[str, str] | None = None
+    enable_profiler: bool = False
+    profiler_dir: str = "/artifacts/profile"
 
 
 # Configuration for the Custom Trainer Container.