File tree Expand file tree Collapse file tree 3 files changed +8
-15
lines changed Expand file tree Collapse file tree 3 files changed +8
-15
lines changed Original file line number Diff line number Diff line change 38
38
uses : ./.github/workflows/docker-build-action.yaml
39
39
with :
40
40
image : inference-pytorch-inf2
41
- dockerfile : dockerfiles/pytorch/Dockerfile
41
+ dockerfile : dockerfiles/pytorch/Dockerfile.inf2
42
42
secrets :
43
43
TAILSCALE_AUTHKEY : ${{ secrets.TAILSCALE_AUTHKEY }}
44
44
REGISTRY_USERNAME : ${{ secrets.REGISTRY_USERNAME }}
Original file line number Diff line number Diff line change @@ -180,7 +180,7 @@ make inference-pytorch-inf2
180
180
2 . Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored.
181
181
182
182
``` bash
183
- docker run -ti -p 5000:5000 -e HF_MODEL_ID=" distilbert/distilbert-base-uncased-finetuned-sst-2-english" -e HF_TASK=" text-classification" -e HF_OPTIMUM_BATCH_SIZE=1 -e HF_OPTIMUM_SEQUENCE_LENGTH=128 integration-test-pytorch:inf2
183
+ docker run -ti -p 5000:5000 -e HF_MODEL_ID=" distilbert/distilbert-base-uncased-finetuned-sst-2-english" -e HF_TASK=" text-classification" -e HF_OPTIMUM_BATCH_SIZE=1 -e HF_OPTIMUM_SEQUENCE_LENGTH=128 --device=/dev/neuron0 integration-test-pytorch:inf2
184
184
```
185
185
186
186
3 . Send request
@@ -190,7 +190,7 @@ curl --request POST \
190
190
--url http://localhost:5000 \
191
191
--header ' Content-Type: application/json' \
192
192
--data ' {
193
- "inputs": "Wow, this is such a great product. I love it!"
193
+ "inputs": "Wow, this is such a great product. I love it!",
194
194
"parameters": { "top_k": 2 }
195
195
}'
196
196
```
Original file line number Diff line number Diff line change @@ -99,25 +99,18 @@ RUN conda install -c conda-forge \
99
99
&& conda clean -ya \
100
100
&& pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
101
101
&& ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
102
- && pip install packaging
103
-
104
- # Install Neuronx-cc and PyTorch
105
- RUN pip install --extra-index-url https://pip.repos.neuron.amazonaws.com \
106
- neuronx-cc==$NEURONX_CC_VERSION \
107
- torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
108
- neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION \
109
- transformers-neuronx==$NEURONX_TRANSFORMERS_VERSION \
110
- && pip install "protobuf>=3.18.3,<4" \
111
- && pip install --no-deps --no-cache-dir -U torchvision==0.16.*
102
+ && pip install --no-cache-dir "protobuf>=3.18.3,<4" setuptools==69.5.1 packaging
112
103
113
104
WORKDIR /
114
105
115
106
# install Hugging Face libraries and its dependencies
116
- RUN pip install --no-cache-dir optimum-neuron[neuronx]==${OPTIMUM_NEURON_VERSION}
107
+ RUN pip install --extra-index-url https://pip.repos.neuron.amazonaws.com --no-cache-dir optimum-neuron[neuronx]==${OPTIMUM_NEURON_VERSION} \
108
+ && pip install --no-deps --no-cache-dir -U torchvision==0.16.*
109
+
117
110
118
111
COPY . .
119
112
# install wheel and setuptools
120
- RUN pip install --no-cache-dir -U pip .
113
+ RUN pip install --no-cache-dir -U pip "."
121
114
122
115
# copy application
123
116
COPY src/huggingface_inference_toolkit huggingface_inference_toolkit
You can’t perform that action at this time.
0 commit comments