NVIDIA
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 9 additions & 1 deletion b/‎.github/workflows/ci.yml‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎doc/conf.py‎
Lines changed: 43 additions & 1 deletion b/‎doc/conf.py‎
Lines changed: 43 additions & 1 deletion
diff --git a/‎doc/index.md‎
Lines changed: 1 addition & 0 deletions b/‎doc/index.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/workloads/ai_dynamo.rst‎
Lines changed: 27 additions & 0 deletions b/‎doc/workloads/ai_dynamo.rst‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎doc/workloads/bash_cmd.rst‎
Lines changed: 68 additions & 0 deletions b/‎doc/workloads/bash_cmd.rst‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎doc/workloads/chakra_replay.rst‎
Lines changed: 66 additions & 0 deletions b/‎doc/workloads/chakra_replay.rst‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎doc/workloads/index.md‎
Lines changed: 31 additions & 0 deletions b/‎doc/workloads/index.md‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎doc/workloads/nccl.rst‎
Lines changed: 68 additions & 0 deletions b/‎doc/workloads/nccl.rst‎
Lines changed: 68 additions & 0 deletions
@@ -18,7 +18,7 @@ jobs:
         uses: actions/setup-python@v5
 
       - name: Install dependencies
-        run: pip install '.[dev]'
+        run: pip install '.[dev,docs]'
 
       - name: Run ruff linter
         run: ruff check
@@ -42,6 +42,14 @@ jobs:
 
           taplo fmt --check --diff
 
+      - name: Build documentation
+        run: |
+          set -eE
+          set -o pipefail
+
+          cd doc
+          make html
+
   test:
     name: Run pytest
 
 
@@ -3,12 +3,41 @@
 # For the full list of built-in configuration values, see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
 
+import os
+import re
+import sys
+
+# Add the project source to Python path for autodoc
+sys.path.insert(0, os.path.abspath("../src"))
+
+
+# Custom autodoc processing to clean up Pydantic classes
+def autodoc_skip_member(app, what, name, obj, skip, options):
+    """Skip unwanted Pydantic and other internal members."""
+    exclude_patterns = {re.compile(r"model_.*")}
+
+    if any(pattern.match(name) for pattern in exclude_patterns):
+        return True
+
+    # Skip private methods starting with underscore (except __init__)
+    if name.startswith("_") and name != "__init__":
+        return True
+
+    return skip
+
+
+def setup(app):
+    app.connect("autodoc-skip-member", autodoc_skip_member)
+
+
 # -- Project information -----------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
 project = "CloudAI"
 copyright = "2025, NVIDIA CORPORATION & AFFILIATES"
 author = "NVIDIA CORPORATION & AFFILIATES"
+version = "1.4.0-beta"
+release = "1.4.0-beta"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
@@ -17,12 +46,25 @@
     "sphinx.ext.autodoc",
     "sphinx.ext.viewcode",
     "sphinx.ext.napoleon",
+    "sphinx.ext.autosummary",
     "myst_parser",
     "sphinxcontrib.mermaid",
+    "sphinx_copybutton",
 ]
 
 exclude_patterns = ["_build"]
 
+# -- Autodoc configuration ---------------------------------------------------
+autodoc_default_options = {
+    "members": True,
+    "member-order": "bysource",
+    "special-members": "__init__",
+    "undoc-members": False,  # Don't show undocumented members
+}
+
+# Generate autosummary even if no references
+autosummary_generate = True
+
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
@@ -35,7 +77,7 @@
     "html_image",
 ]
 
-# Configure MyST to handle mermaid code blocks properly
+# Configure MyST to handle code blocks as directives
 myst_fence_as_directive = ["mermaid"]
 
 # Mermaid configuration
 
@@ -172,4 +172,5 @@ For more detailed instructions and guidance, including advanced usage and troubl
 DEV
 ai_dynamo
 reporting
+workloads/index
 ```
@@ -0,0 +1,27 @@
+AI Dynamo
+=========
+
+This workload (`test_template_name` is ``AIDynamo``) runs AI inference benchmarks using the Dynamo framework with distributed prefill and decode workers.
+
+
+Usage Example
+-------------
+
+See :doc:`../ai_dynamo` for details.
+
+API Documentation
+-----------------
+
+Command Arguments
+~~~~~~~~~~~~~~~~~
+
+.. autoclass:: cloudai.workloads.ai_dynamo.ai_dynamo.AIDynamoCmdArgs
+   :members:
+   :show-inheritance:
+
+Test Definition
+~~~~~~~~~~~~~~~
+
+.. autoclass:: cloudai.workloads.ai_dynamo.ai_dynamo.AIDynamoTestDefinition
+   :members:
+   :show-inheritance:
@@ -0,0 +1,68 @@
+Bash Command
+============
+
+This workload (`test_template_name` is ``BashCmd``) allows you to execute arbitrary bash commands within the CloudAI framework. This is useful for simple scripts, custom testing commands, or integrating external tools.
+
+``cmd`` specified in the ``cmd_args`` section will be added as-is into generated sbatch script.
+
+Usage Example
+-------------
+
+Test TOML example:
+
+.. code-block:: toml
+
+   name = "my_bash_test"
+   description = "Example bash command test"
+   test_template_name = "BashCmd"
+
+   [cmd_args]
+   cmd = "echo 'Hello from CloudAI!'"
+
+Test Scenario example:
+
+.. code-block:: toml
+
+   name = "bash-test"
+
+   [[Tests]]
+   id = "bash.1"
+   num_nodes = 1
+   time_limit = "00:05:00"
+
+   test_name = "my_bash_test"
+
+Test-in-Scenario example:
+
+.. code-block:: toml
+
+   name = "bash-test"
+
+   [[Tests]]
+   id = "bash.1"
+   num_nodes = 1
+   time_limit = "00:05:00"
+
+   name = "my_bash_test"
+   description = "Example bash command test"
+   test_template_name = "BashCmd"
+
+     [Tests.cmd_args]
+     cmd = "echo 'Hello from CloudAI!'"
+
+API Documentation
+---------------------------------
+
+Command Arguments
+~~~~~~~~~~~~~~~~~
+
+.. autoclass:: cloudai.workloads.bash_cmd.bash_cmd.BashCmdArgs
+   :members:
+   :show-inheritance:
+
+Test Definition
+~~~~~~~~~~~~~~~
+
+.. autoclass:: cloudai.workloads.bash_cmd.bash_cmd.BashCmdTestDefinition
+   :members:
+   :show-inheritance:
@@ -0,0 +1,66 @@
+Chakra Replay
+=============
+
+This workload (`test_template_name` is ``ChakraReplay``) replays execution traces from the Chakra execution trace format for performance analysis and debugging.
+
+Usage Example
+-------------
+
+Test TOML example:
+
+.. code-block:: toml
+
+   name = "my_chakra_test"
+   description = "Example Chakra replay test"
+   test_template_name = "ChakraReplay"
+
+   [cmd_args]
+   trace_path = "/path/to/trace.et"
+
+Test Scenario example:
+
+.. code-block:: toml
+
+   name = "chakra-replay-test"
+
+   [[Tests]]
+   id = "chakra.1"
+   num_nodes = 1
+   time_limit = "00:10:00"
+
+   test_name = "my_chakra_test"
+
+Test-in-Scenario example:
+
+.. code-block:: toml
+
+   name = "chakra-replay-test"
+
+   [[Tests]]
+   id = "chakra.1"
+   num_nodes = 1
+   time_limit = "00:10:00"
+
+   name = "my_chakra_test"
+   description = "Example Chakra replay test"
+   test_template_name = "ChakraReplay"
+
+     [Tests.cmd_args]
+     trace_path = "/path/to/trace.et"
+
+API Documentation
+-----------------
+
+Command Arguments
+~~~~~~~~~~~~~~~~~
+
+.. autoclass:: cloudai.workloads.chakra_replay.chakra_replay.ChakraReplayCmdArgs
+   :members:
+   :show-inheritance:
+
+Test Definition
+~~~~~~~~~~~~~~~
+
+.. autoclass:: cloudai.workloads.chakra_replay.chakra_replay.ChakraReplayTestDefinition
+   :members:
+   :show-inheritance:
@@ -0,0 +1,31 @@
+# Workloads Documentation
+
+This section contains automatically generated documentation for all CloudAI workloads. Each workload provides specific functionality for running different types of tests and benchmarks.
+
+## Available Workloads
+
+```{toctree}
+:maxdepth: 1
+:caption: Workloads:
+
+ai_dynamo
+bash_cmd
+chakra_replay
+nccl
+nemo_run
+nixl_bench
+nixl_kvbench
+nixl_perftest
+sleep
+slurm_container
+```
+
+## Adding New Workloads
+
+To add documentation for a new workload:
+
+1. **Add docstrings** to your Python classes and methods
+1. **Create a markdown file** in `doc/workloads/` (e.g., `my_workload.md`)
+1. **Add it to the toctree** in this index file
+
+The documentation will be automatically generated during the build process!
@@ -0,0 +1,68 @@
+NCCL
+====
+
+This workload (`test_template_name` is ``NcclTest``) allows you to execute NCCL benchmarks within the CloudAI framework.
+
+Usage Example
+-------------
+
+Test TOML example:
+
+.. code-block:: toml
+
+   name = "my_nccl_test"
+   description = "Example bash command test"
+   test_template_name = "NcclTest"
+
+   [cmd_args]
+   docker_image_url = "nvcr.io#nvidia/pytorch:25.06-py3"
+
+Test Scenario example:
+
+.. code-block:: toml
+
+   name = "nccl-test"
+
+   [[Tests]]
+   id = "nccl.1"
+   num_nodes = 1
+   time_limit = "00:05:00"
+
+   test_name = "my_nccl_test"
+
+Test-in-Scenario example:
+
+.. code-block:: toml
+
+   name = "nccl-test"
+
+   [[Tests]]
+   id = "nccl.1"
+   num_nodes = 1
+   time_limit = "00:05:00"
+
+   name = "my_nccl_test"
+   description = "Example bash command test"
+   test_template_name = "NcclTest"
+
+     [Tests.cmd_args]
+     docker_image_url = "nvcr.io#nvidia/pytorch:25.06-py3"
+     subtest_name = "all_reduce_perf_mpi"
+     iters = 100
+
+API Documentation
+---------------------------------
+
+Command Arguments
+~~~~~~~~~~~~~~~~~
+
+.. autoclass:: cloudai.workloads.nccl_test.nccl.NCCLCmdArgs
+   :members:
+   :show-inheritance:
+
+Test Definition
+~~~~~~~~~~~~~~~
+
+.. autoclass:: cloudai.workloads.nccl_test.nccl.NCCLTestDefinition
+   :members:
+   :show-inheritance: