Skip to content

Commit f00bf04

Browse files
fix: update inference processor from 'inf2' to 'neuronx' (#5488)
* fix: update inference processor from 'inf2' to 'neuronx' The current version generates the following image tag for 0.4.1: 0.10.2-inf2-py310-sdk2.26.0-ubuntu22.04 But it should be: 0.10.2-neuronx-py310-sdk2.26.0-ubuntu22.04 * chore: add HF vllm neuron 0.4.2 image * chore: add HF vllm neuron 0.4.4 image Note that the actual SDK version is 2.26.1, but the images were published with a 2.26.0 tag. * fix: 0.4.4 SDK version is actually 2.26.1 --------- Co-authored-by: Molly He <mollyhe@amazon.com>
1 parent 11de4b0 commit f00bf04

File tree

1 file changed

+56
-4
lines changed

1 file changed

+56
-4
lines changed

sagemaker-core/src/sagemaker/core/image_uri_config/huggingface-vllm-neuronx.json

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
{
22
"inference": {
33
"processors": [
4-
"inf2"
4+
"neuronx"
55
],
66
"version_aliases": {
7-
"0.4": "0.4.1"
7+
"0.4": "0.4.4"
88
},
99
"versions": {
1010
"0.4.1": {
@@ -27,12 +27,64 @@
2727
"tag_prefix": "0.10.2",
2828
"repository": "huggingface-vllm-inference-neuronx",
2929
"container_version": {
30-
"inf2": "ubuntu22.04"
30+
"neuronx": "ubuntu22.04"
3131
},
3232
"sdk_versions": [
3333
"sdk2.26.0"
3434
]
35+
},
36+
"0.4.2": {
37+
"py_versions": [
38+
"py310"
39+
],
40+
"registries": {
41+
"ap-northeast-1": "763104351884",
42+
"ap-south-1": "763104351884",
43+
"ap-southeast-1": "763104351884",
44+
"ap-southeast-2": "763104351884",
45+
"eu-central-1": "763104351884",
46+
"eu-west-1": "763104351884",
47+
"eu-west-3": "763104351884",
48+
"sa-east-1": "763104351884",
49+
"us-east-1": "763104351884",
50+
"us-east-2": "763104351884",
51+
"us-west-2": "763104351884"
52+
},
53+
"tag_prefix": "0.11.0-optimum0.4.2",
54+
"repository": "huggingface-vllm-inference-neuronx",
55+
"container_version": {
56+
"neuronx": "ubuntu22.04"
57+
},
58+
"sdk_versions": [
59+
"sdk2.26.0"
60+
]
61+
},
62+
"0.4.4": {
63+
"py_versions": [
64+
"py310"
65+
],
66+
"registries": {
67+
"ap-northeast-1": "763104351884",
68+
"ap-south-1": "763104351884",
69+
"ap-southeast-1": "763104351884",
70+
"ap-southeast-2": "763104351884",
71+
"eu-central-1": "763104351884",
72+
"eu-west-1": "763104351884",
73+
"eu-west-3": "763104351884",
74+
"sa-east-1": "763104351884",
75+
"us-east-1": "763104351884",
76+
"us-east-2": "763104351884",
77+
"us-west-2": "763104351884"
78+
},
79+
"tag_prefix": "0.11.0-optimum0.4.4",
80+
"repository": "huggingface-vllm-inference-neuronx",
81+
"container_version": {
82+
"neuronx": "ubuntu22.04"
83+
},
84+
"sdk_versions": [
85+
"sdk2.26.1"
86+
]
3587
}
3688
}
3789
}
38-
}
90+
}

0 commit comments

Comments
 (0)