@@ -6,16 +6,16 @@ This guide covers how to run AWS Deep Learning Containers on AWS Platforms such
66
77### Using SageMaker Python SDK
88
9- #### Deploy a vLLM inference endpoint:
9+ #### Deploy an SGLang inference endpoint:
1010
1111``` python
1212from sagemaker.model import Model
1313
1414model = Model(
15- image_uri = " {{ images.latest_vllm_sagemaker }} " ,
15+ image_uri = " {{ images.latest_sglang_sagemaker }} " ,
1616 role = " arn:aws:iam::<account_id>:role/<role_name>" ,
1717 env = {
18- " SM_VLLM_MODEL " : " meta-llama/Llama-3.1-8B-Instruct" ,
18+ " SM_SGLANG_MODEL_PATH " : " meta-llama/Llama-3.1-8B-Instruct" ,
1919 " HF_TOKEN" : " <your_hf_token>" ,
2020 },
2121)
@@ -26,16 +26,16 @@ predictor = model.deploy(
2626)
2727```
2828
29- #### Deploy an SGLang inference endpoint:
29+ #### Deploy a vLLM inference endpoint:
3030
3131``` python
3232from sagemaker.model import Model
3333
3434model = Model(
35- image_uri = " {{ images.latest_sglang_sagemaker }} " ,
35+ image_uri = " {{ images.latest_vllm_sagemaker }} " ,
3636 role = " arn:aws:iam::<account_id>:role/<role_name>" ,
3737 env = {
38- " SM_SGLANG_MODEL_PATH " : " meta-llama/Llama-3.1-8B-Instruct" ,
38+ " SM_VLLM_MODEL " : " meta-llama/Llama-3.1-8B-Instruct" ,
3939 " HF_TOKEN" : " <your_hf_token>" ,
4040 },
4141)
@@ -48,77 +48,77 @@ predictor = model.deploy(
4848
4949### Using Boto3
5050
51- #### Deploy a vLLM inference endpoint:
51+ #### Deploy an SGLang inference endpoint:
5252
5353``` python
5454import boto3
5555
5656sagemaker = boto3.client(" sagemaker" )
5757
5858sagemaker.create_model(
59- ModelName = " vllm -model" ,
59+ ModelName = " sglang -model" ,
6060 PrimaryContainer = {
61- " Image" : " {{ images.latest_vllm_sagemaker }} " ,
61+ " Image" : " {{ images.latest_sglang_sagemaker }} " ,
6262 " Environment" : {
63- " SM_VLLM_MODEL " : " meta-llama/Llama-3.1-8B-Instruct" ,
63+ " SM_SGLANG_MODEL_PATH " : " meta-llama/Llama-3.1-8B-Instruct" ,
6464 " HF_TOKEN" : " <your_hf_token>" ,
6565 },
6666 },
6767 ExecutionRoleArn = " arn:aws:iam::<account_id>:role/<role_name>" ,
6868)
6969
7070sagemaker.create_endpoint_config(
71- EndpointConfigName = " vllm -endpoint-config" ,
71+ EndpointConfigName = " sglang -endpoint-config" ,
7272 ProductionVariants = [
7373 {
7474 " VariantName" : " default" ,
75- " ModelName" : " vllm -model" ,
75+ " ModelName" : " sglang -model" ,
7676 " InstanceType" : " ml.g5.2xlarge" ,
7777 " InitialInstanceCount" : 1 ,
7878 }
7979 ],
8080)
8181
8282sagemaker.create_endpoint(
83- EndpointName = " vllm -endpoint" ,
84- EndpointConfigName = " vllm -endpoint-config" ,
83+ EndpointName = " sglang -endpoint" ,
84+ EndpointConfigName = " sglang -endpoint-config" ,
8585)
8686```
8787
88- #### Deploy an SGLang inference endpoint:
88+ #### Deploy a vLLM inference endpoint:
8989
9090``` python
9191import boto3
9292
9393sagemaker = boto3.client(" sagemaker" )
9494
9595sagemaker.create_model(
96- ModelName = " sglang -model" ,
96+ ModelName = " vllm -model" ,
9797 PrimaryContainer = {
98- " Image" : " {{ images.latest_sglang_sagemaker }} " ,
98+ " Image" : " {{ images.latest_vllm_sagemaker }} " ,
9999 " Environment" : {
100- " SM_SGLANG_MODEL_PATH " : " meta-llama/Llama-3.1-8B-Instruct" ,
100+ " SM_VLLM_MODEL " : " meta-llama/Llama-3.1-8B-Instruct" ,
101101 " HF_TOKEN" : " <your_hf_token>" ,
102102 },
103103 },
104104 ExecutionRoleArn = " arn:aws:iam::<account_id>:role/<role_name>" ,
105105)
106106
107107sagemaker.create_endpoint_config(
108- EndpointConfigName = " sglang -endpoint-config" ,
108+ EndpointConfigName = " vllm -endpoint-config" ,
109109 ProductionVariants = [
110110 {
111111 " VariantName" : " default" ,
112- " ModelName" : " sglang -model" ,
112+ " ModelName" : " vllm -model" ,
113113 " InstanceType" : " ml.g5.2xlarge" ,
114114 " InitialInstanceCount" : 1 ,
115115 }
116116 ],
117117)
118118
119119sagemaker.create_endpoint(
120- EndpointName = " sglang -endpoint" ,
121- EndpointConfigName = " sglang -endpoint-config" ,
120+ EndpointName = " vllm -endpoint" ,
121+ EndpointConfigName = " vllm -endpoint-config" ,
122122)
123123```
124124
0 commit comments