docker example

philschmid · philschmid · commit f95d3740ad8a · 2024-07-01T12:54:16.000Z
diff --git a/.github/workflows/build-container.yaml b/.github/workflows/build-container.yaml
@@ -38,7 +38,7 @@ jobs:
     uses: ./.github/workflows/docker-build-action.yaml
     with:
       image: inference-pytorch-inf2
-      dockerfile: dockerfiles/pytorch/Dockerfile
+      dockerfile: dockerfiles/pytorch/Dockerfile.inf2
     secrets:
       TAILSCALE_AUTHKEY: ${{ secrets.TAILSCALE_AUTHKEY }}
       REGISTRY_USERNAME: ${{ secrets.REGISTRY_USERNAME }}
diff --git a/README.md b/README.md
@@ -180,7 +180,7 @@ make inference-pytorch-inf2
 2. Run the container and provide either environment variables to the HUB model you want to use or mount a volume to the container, where your model is stored.
 
 ```bash
-docker run -ti -p 5000:5000 -e HF_MODEL_ID="distilbert/distilbert-base-uncased-finetuned-sst-2-english" -e HF_TASK="text-classification" -e HF_OPTIMUM_BATCH_SIZE=1 -e HF_OPTIMUM_SEQUENCE_LENGTH=128 integration-test-pytorch:inf2
+docker run -ti -p 5000:5000 -e HF_MODEL_ID="distilbert/distilbert-base-uncased-finetuned-sst-2-english" -e HF_TASK="text-classification" -e HF_OPTIMUM_BATCH_SIZE=1 -e HF_OPTIMUM_SEQUENCE_LENGTH=128 --device=/dev/neuron0 integration-test-pytorch:inf2
 ```
 
 3. Send request
@@ -190,7 +190,7 @@ curl --request POST \
 	--url http://localhost:5000 \
 	--header 'Content-Type: application/json' \
 	--data '{
-	"inputs": "Wow, this is such a great product. I love it!"
+	"inputs": "Wow, this is such a great product. I love it!",
 	"parameters": { "top_k": 2 }
 }'
 ```
diff --git a/dockerfiles/pytorch/Dockerfile.inf2 b/dockerfiles/pytorch/Dockerfile.inf2
@@ -99,25 +99,18 @@ RUN conda install -c conda-forge \
  && conda clean -ya \
  && pip install --upgrade pip --trusted-host pypi.org --trusted-host files.pythonhosted.org \
  && ln -s /opt/conda/bin/pip /usr/local/bin/pip3 \
- && pip install packaging
-
-# Install Neuronx-cc and PyTorch
-RUN pip install --extra-index-url https://pip.repos.neuron.amazonaws.com \
-    neuronx-cc==$NEURONX_CC_VERSION \
-    torch-neuronx==$NEURONX_FRAMEWORK_VERSION \
-    neuronx_distributed==$NEURONX_DISTRIBUTED_VERSION \
-    transformers-neuronx==$NEURONX_TRANSFORMERS_VERSION \
- && pip install "protobuf>=3.18.3,<4" \
- && pip install --no-deps --no-cache-dir -U torchvision==0.16.*
+ && pip install --no-cache-dir "protobuf>=3.18.3,<4" setuptools==69.5.1 packaging
 
 WORKDIR /
 
 # install Hugging Face libraries and its dependencies
-RUN pip install --no-cache-dir optimum-neuron[neuronx]==${OPTIMUM_NEURON_VERSION}
+RUN pip install --extra-index-url https://pip.repos.neuron.amazonaws.com --no-cache-dir optimum-neuron[neuronx]==${OPTIMUM_NEURON_VERSION}  \
+ && pip install --no-deps --no-cache-dir -U torchvision==0.16.*
+
 
 COPY . .
 # install wheel and setuptools
-RUN pip install --no-cache-dir -U pip .
+RUN pip install --no-cache-dir -U pip "."
 
 # copy application
 COPY src/huggingface_inference_toolkit huggingface_inference_toolkit