add vllm arm64

sirutBuasai · sirutBuasai · commit 77b471d07f6a · 2026-01-13T18:35:29.000-08:00
Signed-off-by: sirutBuasai &lt;sirutbuasai27@outlook.com&gt;
diff --git a/docs/get_started/using_dlcs.md b/docs/get_started/using_dlcs.md
@@ -6,16 +6,16 @@ This guide covers how to run AWS Deep Learning Containers on AWS Platforms such
 
 ### Using SageMaker Python SDK
 
-#### Deploy a vLLM inference endpoint:
+#### Deploy an SGLang inference endpoint:
 
 ```python
 from sagemaker.model import Model
 
 model = Model(
-    image_uri="{{ images.latest_vllm_sagemaker }}",
+    image_uri="{{ images.latest_sglang_sagemaker }}",
     role="arn:aws:iam::<account_id>:role/<role_name>",
     env={
-        "SM_VLLM_MODEL": "meta-llama/Llama-3.1-8B-Instruct",
+        "SM_SGLANG_MODEL_PATH": "meta-llama/Llama-3.1-8B-Instruct",
         "HF_TOKEN": "<your_hf_token>",
     },
 )
@@ -26,16 +26,16 @@ predictor = model.deploy(
 )
 ```
 
-#### Deploy an SGLang inference endpoint:
+#### Deploy a vLLM inference endpoint:
 
 ```python
 from sagemaker.model import Model
 
 model = Model(
-    image_uri="{{ images.latest_sglang_sagemaker }}",
+    image_uri="{{ images.latest_vllm_sagemaker }}",
     role="arn:aws:iam::<account_id>:role/<role_name>",
     env={
-        "SM_SGLANG_MODEL_PATH": "meta-llama/Llama-3.1-8B-Instruct",
+        "SM_VLLM_MODEL": "meta-llama/Llama-3.1-8B-Instruct",
         "HF_TOKEN": "<your_hf_token>",
     },
 )
@@ -48,77 +48,77 @@ predictor = model.deploy(
 
 ### Using Boto3
 
-#### Deploy a vLLM inference endpoint:
+#### Deploy an SGLang inference endpoint:
 
 ```python
 import boto3
 
 sagemaker = boto3.client("sagemaker")
 
 sagemaker.create_model(
-    ModelName="vllm-model",
+    ModelName="sglang-model",
     PrimaryContainer={
-        "Image": "{{ images.latest_vllm_sagemaker }}",
+        "Image": "{{ images.latest_sglang_sagemaker }}",
         "Environment": {
-            "SM_VLLM_MODEL": "meta-llama/Llama-3.1-8B-Instruct",
+            "SM_SGLANG_MODEL_PATH": "meta-llama/Llama-3.1-8B-Instruct",
             "HF_TOKEN": "<your_hf_token>",
         },
     },
     ExecutionRoleArn="arn:aws:iam::<account_id>:role/<role_name>",
 )
 
 sagemaker.create_endpoint_config(
-    EndpointConfigName="vllm-endpoint-config",
+    EndpointConfigName="sglang-endpoint-config",
     ProductionVariants=[
         {
             "VariantName": "default",
-            "ModelName": "vllm-model",
+            "ModelName": "sglang-model",
             "InstanceType": "ml.g5.2xlarge",
             "InitialInstanceCount": 1,
         }
     ],
 )
 
 sagemaker.create_endpoint(
-    EndpointName="vllm-endpoint",
-    EndpointConfigName="vllm-endpoint-config",
+    EndpointName="sglang-endpoint",
+    EndpointConfigName="sglang-endpoint-config",
 )
 ```
 
-#### Deploy an SGLang inference endpoint:
+#### Deploy a vLLM inference endpoint:
 
 ```python
 import boto3
 
 sagemaker = boto3.client("sagemaker")
 
 sagemaker.create_model(
-    ModelName="sglang-model",
+    ModelName="vllm-model",
     PrimaryContainer={
-        "Image": "{{ images.latest_sglang_sagemaker }}",
+        "Image": "{{ images.latest_vllm_sagemaker }}",
         "Environment": {
-            "SM_SGLANG_MODEL_PATH": "meta-llama/Llama-3.1-8B-Instruct",
+            "SM_VLLM_MODEL": "meta-llama/Llama-3.1-8B-Instruct",
             "HF_TOKEN": "<your_hf_token>",
         },
     },
     ExecutionRoleArn="arn:aws:iam::<account_id>:role/<role_name>",
 )
 
 sagemaker.create_endpoint_config(
-    EndpointConfigName="sglang-endpoint-config",
+    EndpointConfigName="vllm-endpoint-config",
     ProductionVariants=[
         {
             "VariantName": "default",
-            "ModelName": "sglang-model",
+            "ModelName": "vllm-model",
             "InstanceType": "ml.g5.2xlarge",
             "InitialInstanceCount": 1,
         }
     ],
 )
 
 sagemaker.create_endpoint(
-    EndpointName="sglang-endpoint",
-    EndpointConfigName="sglang-endpoint-config",
+    EndpointName="vllm-endpoint",
+    EndpointConfigName="vllm-endpoint-config",
 )
 ```
 
diff --git a/docs/mkdocs/stylesheets/extra.css b/docs/mkdocs/stylesheets/extra.css
@@ -48,7 +48,7 @@
 
 /* Theme colors */
 [data-md-color-scheme="default"] {
-  --md-primary-fg-color: var(--aws-stone);
+  --md-primary-fg-color: var(--aws-white);
   --md-accent-fg-color: var(--aws-smile);
 }
 
diff --git a/docs/reference/available_images.template.md b/docs/reference/available_images.template.md
@@ -1,5 +1,7 @@
 # Available Images
 
+This page is for referencing our supported deep learning containers. Refer to the tables below for all images that are available in ECR repositories. Additionally, we also publish {{ public_ecr_image_list }} to [ECR Public Gallery](https://gallery.ecr.aws/deep-learning-containers).
+
 ## Region Availability
 
 | Region                    | Code           | General | Neuron | Example URL                                                                          |
diff --git a/docs/src/constants.py b/docs/src/constants.py
@@ -12,4 +12,4 @@
 # language governing permissions and limitations under the License.
 """Global variables for documentation generation."""
 
-TABLE_HEADER = "##"
+AVAILABLE_IMAGES_TABLE_HEADER = "##"
diff --git a/docs/src/data/images.yml b/docs/src/data/images.yml
@@ -5,6 +5,25 @@
 # Run `python docs/src/generate.py` to regenerate documentation from this file.
 # =============================================================================
 
+# -----------------------------------------------------------------------------
+# Public ECR Repositories
+# -----------------------------------------------------------------------------
+# List of repositories published to ECR Public Gallery
+# -----------------------------------------------------------------------------
+
+public_ecr_repositories:
+  - base
+  - vllm
+  - vllm-arm64
+  - sglang
+  - pytorch-training
+  - pytorch-inference
+  - tensorflow-training
+  - tensorflow-inference
+  - pytorch-inference-arm64
+  - tensorflow-inference-arm64
+  - pytorch-training-arm64
+
 # -----------------------------------------------------------------------------
 # Framework Support Policy
 # -----------------------------------------------------------------------------
@@ -167,6 +186,9 @@ images:
   # ===========================================================================
   # ARM64/Graviton Containers
   # ===========================================================================
+  vllm-arm64:
+    - "0.10.2-gpu-py312-cu129-ubuntu22.04-ec2"
+
   pytorch-training-arm64:
     - "2.7.0-gpu-py312-cu128-ubuntu22.04-ec2"
 
diff --git a/docs/src/macros.py b/docs/src/macros.py
@@ -34,6 +34,9 @@ def get_latest_image(repo: str, platform: str) -> str:
 
 def define_env(env):
     """Define variables for mkdocs-macros-plugin."""
+    data = load_yaml(DATA_FILE)
+    public_repos = data.get("public_ecr_repositories", [])
+    env.variables["public_ecr_image_list"] = ", ".join(public_repos)
     env.variables["images"] = {
         "latest_pytorch_training_ec2": get_latest_image("pytorch-training", "-ec2"),
         "latest_vllm_sagemaker": get_latest_image("vllm", "-sagemaker"),
diff --git a/docs/src/tables/__init__.py b/docs/src/tables/__init__.py
@@ -17,8 +17,8 @@
 
 IMAGE_TABLE_GENERATORS = [
     base_table,
-    vllm_table,
     sglang_table,
+    vllm_table,
     pytorch_table,
     tensorflow_table,
     huggingface_pytorch_table,
diff --git a/docs/src/tables/autogluon_table.py b/docs/src/tables/autogluon_table.py
@@ -14,7 +14,7 @@
 
 import re
 
-from constants import TABLE_HEADER
+from constants import AVAILABLE_IMAGES_TABLE_HEADER
 from utils import build_ecr_url, render_table
 
 REPO_KEYS = ["autogluon-training", "autogluon-inference"]
@@ -70,6 +70,8 @@ def generate(yaml_data: dict) -> str:
             )
 
         display_name = DISPLAY_NAMES.get(repo_key, repo_key)
-        sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows))
+        sections.append(
+            f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows)
+        )
 
     return "\n\n".join(sections)
diff --git a/docs/src/tables/base_table.py b/docs/src/tables/base_table.py
@@ -14,7 +14,7 @@
 
 import re
 
-from constants import TABLE_HEADER
+from constants import AVAILABLE_IMAGES_TABLE_HEADER
 from utils import build_ecr_url, render_table
 
 REPO_KEYS = ["base"]
@@ -72,6 +72,8 @@ def generate(yaml_data: dict) -> str:
             )
 
         display_name = DISPLAY_NAMES.get(repo_key, repo_key)
-        sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows))
+        sections.append(
+            f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows)
+        )
 
     return "\n\n".join(sections)
diff --git a/docs/src/tables/djl_table.py b/docs/src/tables/djl_table.py
@@ -14,7 +14,7 @@
 
 import re
 
-from constants import TABLE_HEADER
+from constants import AVAILABLE_IMAGES_TABLE_HEADER
 from utils import build_ecr_url, render_table
 
 REPO_KEYS = ["djl-inference"]
@@ -85,6 +85,8 @@ def generate(yaml_data: dict) -> str:
             )
 
         display_name = DISPLAY_NAMES.get(repo_key, repo_key)
-        sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows))
+        sections.append(
+            f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows)
+        )
 
     return "\n\n".join(sections)
diff --git a/docs/src/tables/huggingface_pytorch_table.py b/docs/src/tables/huggingface_pytorch_table.py
@@ -14,7 +14,7 @@
 
 import re
 
-from constants import TABLE_HEADER
+from constants import AVAILABLE_IMAGES_TABLE_HEADER
 from utils import build_ecr_url, render_table
 
 REPO_KEYS = [
@@ -127,6 +127,8 @@ def generate(yaml_data: dict) -> str:
                 )
 
         display_name = DISPLAY_NAMES.get(repo_key, repo_key)
-        sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(columns, rows))
+        sections.append(
+            f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(columns, rows)
+        )
 
     return "\n\n".join(sections)
diff --git a/docs/src/tables/huggingface_tensorflow_table.py b/docs/src/tables/huggingface_tensorflow_table.py
@@ -14,7 +14,7 @@
 
 import re
 
-from constants import TABLE_HEADER
+from constants import AVAILABLE_IMAGES_TABLE_HEADER
 from utils import build_ecr_url, render_table
 
 REPO_KEYS = [
@@ -82,6 +82,8 @@ def generate(yaml_data: dict) -> str:
             )
 
         display_name = DISPLAY_NAMES.get(repo_key, repo_key)
-        sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows))
+        sections.append(
+            f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows)
+        )
 
     return "\n\n".join(sections)
diff --git a/docs/src/tables/neuron_table.py b/docs/src/tables/neuron_table.py
@@ -14,7 +14,7 @@
 
 import re
 
-from constants import TABLE_HEADER
+from constants import AVAILABLE_IMAGES_TABLE_HEADER
 from utils import build_ecr_url, render_table
 
 REPO_KEYS = [
@@ -81,6 +81,8 @@ def generate(yaml_data: dict) -> str:
             )
 
         display_name = DISPLAY_NAMES.get(repo_key, repo_key)
-        sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows))
+        sections.append(
+            f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows)
+        )
 
     return "\n\n".join(sections)
diff --git a/docs/src/tables/pytorch_table.py b/docs/src/tables/pytorch_table.py
@@ -14,7 +14,7 @@
 
 import re
 
-from constants import TABLE_HEADER
+from constants import AVAILABLE_IMAGES_TABLE_HEADER
 from utils import build_ecr_url, render_table
 
 REPO_KEYS = [
@@ -82,6 +82,8 @@ def generate(yaml_data: dict) -> str:
             )
 
         display_name = DISPLAY_NAMES.get(repo_key, repo_key)
-        sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows))
+        sections.append(
+            f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows)
+        )
 
     return "\n\n".join(sections)
diff --git a/docs/src/tables/sglang_table.py b/docs/src/tables/sglang_table.py
@@ -14,7 +14,7 @@
 
 import re
 
-from constants import TABLE_HEADER
+from constants import AVAILABLE_IMAGES_TABLE_HEADER
 from utils import build_ecr_url, render_table
 
 REPO_KEYS = ["sglang"]
@@ -72,6 +72,8 @@ def generate(yaml_data: dict) -> str:
             )
 
         display_name = DISPLAY_NAMES.get(repo_key, repo_key)
-        sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows))
+        sections.append(
+            f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows)
+        )
 
     return "\n\n".join(sections)
diff --git a/docs/src/tables/tensorflow_table.py b/docs/src/tables/tensorflow_table.py
@@ -14,7 +14,7 @@
 
 import re
 
-from constants import TABLE_HEADER
+from constants import AVAILABLE_IMAGES_TABLE_HEADER
 from utils import build_ecr_url, render_table
 
 REPO_KEYS = [
@@ -78,6 +78,8 @@ def generate(yaml_data: dict) -> str:
             )
 
         display_name = DISPLAY_NAMES.get(repo_key, repo_key)
-        sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows))
+        sections.append(
+            f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows)
+        )
 
     return "\n\n".join(sections)
diff --git a/docs/src/tables/triton_table.py b/docs/src/tables/triton_table.py
diff --git a/docs/src/tables/vllm_table.py b/docs/src/tables/vllm_table.py

Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@`
`48`	`48`
`49`	`49`	`/* Theme colors */`
`50`	`50`	`[data-md-color-scheme="default"] {`
`51`		`- --md-primary-fg-color: var(--aws-stone);`
	`51`	`+ --md-primary-fg-color: var(--aws-white);`
`52`	`52`	`--md-accent-fg-color: var(--aws-smile);`
`53`	`53`	`}`
`54`	`54`