Skip to content

Commit 77b471d

Browse files
committed
add vllm arm64
Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com>
1 parent 0391f78 commit 77b471d

18 files changed

+98
-49
lines changed

docs/get_started/using_dlcs.md

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@ This guide covers how to run AWS Deep Learning Containers on AWS Platforms such
66

77
### Using SageMaker Python SDK
88

9-
#### Deploy a vLLM inference endpoint:
9+
#### Deploy an SGLang inference endpoint:
1010

1111
```python
1212
from sagemaker.model import Model
1313

1414
model = Model(
15-
image_uri="{{ images.latest_vllm_sagemaker }}",
15+
image_uri="{{ images.latest_sglang_sagemaker }}",
1616
role="arn:aws:iam::<account_id>:role/<role_name>",
1717
env={
18-
"SM_VLLM_MODEL": "meta-llama/Llama-3.1-8B-Instruct",
18+
"SM_SGLANG_MODEL_PATH": "meta-llama/Llama-3.1-8B-Instruct",
1919
"HF_TOKEN": "<your_hf_token>",
2020
},
2121
)
@@ -26,16 +26,16 @@ predictor = model.deploy(
2626
)
2727
```
2828

29-
#### Deploy an SGLang inference endpoint:
29+
#### Deploy a vLLM inference endpoint:
3030

3131
```python
3232
from sagemaker.model import Model
3333

3434
model = Model(
35-
image_uri="{{ images.latest_sglang_sagemaker }}",
35+
image_uri="{{ images.latest_vllm_sagemaker }}",
3636
role="arn:aws:iam::<account_id>:role/<role_name>",
3737
env={
38-
"SM_SGLANG_MODEL_PATH": "meta-llama/Llama-3.1-8B-Instruct",
38+
"SM_VLLM_MODEL": "meta-llama/Llama-3.1-8B-Instruct",
3939
"HF_TOKEN": "<your_hf_token>",
4040
},
4141
)
@@ -48,77 +48,77 @@ predictor = model.deploy(
4848

4949
### Using Boto3
5050

51-
#### Deploy a vLLM inference endpoint:
51+
#### Deploy an SGLang inference endpoint:
5252

5353
```python
5454
import boto3
5555

5656
sagemaker = boto3.client("sagemaker")
5757

5858
sagemaker.create_model(
59-
ModelName="vllm-model",
59+
ModelName="sglang-model",
6060
PrimaryContainer={
61-
"Image": "{{ images.latest_vllm_sagemaker }}",
61+
"Image": "{{ images.latest_sglang_sagemaker }}",
6262
"Environment": {
63-
"SM_VLLM_MODEL": "meta-llama/Llama-3.1-8B-Instruct",
63+
"SM_SGLANG_MODEL_PATH": "meta-llama/Llama-3.1-8B-Instruct",
6464
"HF_TOKEN": "<your_hf_token>",
6565
},
6666
},
6767
ExecutionRoleArn="arn:aws:iam::<account_id>:role/<role_name>",
6868
)
6969

7070
sagemaker.create_endpoint_config(
71-
EndpointConfigName="vllm-endpoint-config",
71+
EndpointConfigName="sglang-endpoint-config",
7272
ProductionVariants=[
7373
{
7474
"VariantName": "default",
75-
"ModelName": "vllm-model",
75+
"ModelName": "sglang-model",
7676
"InstanceType": "ml.g5.2xlarge",
7777
"InitialInstanceCount": 1,
7878
}
7979
],
8080
)
8181

8282
sagemaker.create_endpoint(
83-
EndpointName="vllm-endpoint",
84-
EndpointConfigName="vllm-endpoint-config",
83+
EndpointName="sglang-endpoint",
84+
EndpointConfigName="sglang-endpoint-config",
8585
)
8686
```
8787

88-
#### Deploy an SGLang inference endpoint:
88+
#### Deploy a vLLM inference endpoint:
8989

9090
```python
9191
import boto3
9292

9393
sagemaker = boto3.client("sagemaker")
9494

9595
sagemaker.create_model(
96-
ModelName="sglang-model",
96+
ModelName="vllm-model",
9797
PrimaryContainer={
98-
"Image": "{{ images.latest_sglang_sagemaker }}",
98+
"Image": "{{ images.latest_vllm_sagemaker }}",
9999
"Environment": {
100-
"SM_SGLANG_MODEL_PATH": "meta-llama/Llama-3.1-8B-Instruct",
100+
"SM_VLLM_MODEL": "meta-llama/Llama-3.1-8B-Instruct",
101101
"HF_TOKEN": "<your_hf_token>",
102102
},
103103
},
104104
ExecutionRoleArn="arn:aws:iam::<account_id>:role/<role_name>",
105105
)
106106

107107
sagemaker.create_endpoint_config(
108-
EndpointConfigName="sglang-endpoint-config",
108+
EndpointConfigName="vllm-endpoint-config",
109109
ProductionVariants=[
110110
{
111111
"VariantName": "default",
112-
"ModelName": "sglang-model",
112+
"ModelName": "vllm-model",
113113
"InstanceType": "ml.g5.2xlarge",
114114
"InitialInstanceCount": 1,
115115
}
116116
],
117117
)
118118

119119
sagemaker.create_endpoint(
120-
EndpointName="sglang-endpoint",
121-
EndpointConfigName="sglang-endpoint-config",
120+
EndpointName="vllm-endpoint",
121+
EndpointConfigName="vllm-endpoint-config",
122122
)
123123
```
124124

docs/mkdocs/stylesheets/extra.css

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848

4949
/* Theme colors */
5050
[data-md-color-scheme="default"] {
51-
--md-primary-fg-color: var(--aws-stone);
51+
--md-primary-fg-color: var(--aws-white);
5252
--md-accent-fg-color: var(--aws-smile);
5353
}
5454

docs/reference/available_images.template.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# Available Images
22

3+
This page is for referencing our supported deep learning containers. Refer to the tables below for all images that are available in ECR repositories. Additionally, we also publish {{ public_ecr_image_list }} to [ECR Public Gallery](https://gallery.ecr.aws/deep-learning-containers).
4+
35
## Region Availability
46

57
| Region | Code | General | Neuron | Example URL |

docs/src/constants.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@
1212
# language governing permissions and limitations under the License.
1313
"""Global variables for documentation generation."""
1414

15-
TABLE_HEADER = "##"
15+
AVAILABLE_IMAGES_TABLE_HEADER = "##"

docs/src/data/images.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,25 @@
55
# Run `python docs/src/generate.py` to regenerate documentation from this file.
66
# =============================================================================
77

8+
# -----------------------------------------------------------------------------
9+
# Public ECR Repositories
10+
# -----------------------------------------------------------------------------
11+
# List of repositories published to ECR Public Gallery
12+
# -----------------------------------------------------------------------------
13+
14+
public_ecr_repositories:
15+
- base
16+
- vllm
17+
- vllm-arm64
18+
- sglang
19+
- pytorch-training
20+
- pytorch-inference
21+
- tensorflow-training
22+
- tensorflow-inference
23+
- pytorch-inference-arm64
24+
- tensorflow-inference-arm64
25+
- pytorch-training-arm64
26+
827
# -----------------------------------------------------------------------------
928
# Framework Support Policy
1029
# -----------------------------------------------------------------------------
@@ -167,6 +186,9 @@ images:
167186
# ===========================================================================
168187
# ARM64/Graviton Containers
169188
# ===========================================================================
189+
vllm-arm64:
190+
- "0.10.2-gpu-py312-cu129-ubuntu22.04-ec2"
191+
170192
pytorch-training-arm64:
171193
- "2.7.0-gpu-py312-cu128-ubuntu22.04-ec2"
172194

docs/src/macros.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ def get_latest_image(repo: str, platform: str) -> str:
3434

3535
def define_env(env):
3636
"""Define variables for mkdocs-macros-plugin."""
37+
data = load_yaml(DATA_FILE)
38+
public_repos = data.get("public_ecr_repositories", [])
39+
env.variables["public_ecr_image_list"] = ", ".join(public_repos)
3740
env.variables["images"] = {
3841
"latest_pytorch_training_ec2": get_latest_image("pytorch-training", "-ec2"),
3942
"latest_vllm_sagemaker": get_latest_image("vllm", "-sagemaker"),

docs/src/tables/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@
1717

1818
IMAGE_TABLE_GENERATORS = [
1919
base_table,
20-
vllm_table,
2120
sglang_table,
21+
vllm_table,
2222
pytorch_table,
2323
tensorflow_table,
2424
huggingface_pytorch_table,

docs/src/tables/autogluon_table.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import re
1616

17-
from constants import TABLE_HEADER
17+
from constants import AVAILABLE_IMAGES_TABLE_HEADER
1818
from utils import build_ecr_url, render_table
1919

2020
REPO_KEYS = ["autogluon-training", "autogluon-inference"]
@@ -70,6 +70,8 @@ def generate(yaml_data: dict) -> str:
7070
)
7171

7272
display_name = DISPLAY_NAMES.get(repo_key, repo_key)
73-
sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows))
73+
sections.append(
74+
f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows)
75+
)
7476

7577
return "\n\n".join(sections)

docs/src/tables/base_table.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import re
1616

17-
from constants import TABLE_HEADER
17+
from constants import AVAILABLE_IMAGES_TABLE_HEADER
1818
from utils import build_ecr_url, render_table
1919

2020
REPO_KEYS = ["base"]
@@ -72,6 +72,8 @@ def generate(yaml_data: dict) -> str:
7272
)
7373

7474
display_name = DISPLAY_NAMES.get(repo_key, repo_key)
75-
sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows))
75+
sections.append(
76+
f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows)
77+
)
7678

7779
return "\n\n".join(sections)

docs/src/tables/djl_table.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
import re
1616

17-
from constants import TABLE_HEADER
17+
from constants import AVAILABLE_IMAGES_TABLE_HEADER
1818
from utils import build_ecr_url, render_table
1919

2020
REPO_KEYS = ["djl-inference"]
@@ -85,6 +85,8 @@ def generate(yaml_data: dict) -> str:
8585
)
8686

8787
display_name = DISPLAY_NAMES.get(repo_key, repo_key)
88-
sections.append(f"{TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows))
88+
sections.append(
89+
f"{AVAILABLE_IMAGES_TABLE_HEADER} {display_name}\n" + render_table(COLUMNS, rows)
90+
)
8991

9092
return "\n\n".join(sections)

0 commit comments

Comments
 (0)