From 67ac77730298f13bac333a07ee32f6d2353622ce Mon Sep 17 00:00:00 2001
From: philschmid <schmidphilipp1995@gmail.com>
Date: Tue, 20 Aug 2024 13:53:49 +0200
Subject: [PATCH 1/4] update

---
 README.md      | 53 +++++++++++++++++++++-----------------------------
 pyproject.toml | 10 +++++-----
 setup.cfg      |  1 -
 setup.py       |  2 +-
 4 files changed, 28 insertions(+), 38 deletions(-)
diff --git a/README.md b/README.md
index 66ef23a7..96611e4f 100644
--- a/README.md
+++ b/README.md
@@ -1,21 +1,18 @@
-
 <div style="display:flex; text-align:center; justify-content:center;">
 <img src="https://huggingface.co/front/assets/huggingface_logo.svg" width="100"/>
 <h1 style="margin-top:auto;"> Hugging Face Inference Toolkit <h1>
 </div>
 
-Hugging Face Inference Toolkit is for serving 🤗 Transformers models in containers. This library provides default pre-processing, predict and postprocessing for Transformers, Sentence Tranfsformers. It is also possible to define custom `handler.py` for customization. The Toolkit is build to work with the [Hugging Face Hub](https://huggingface.co/models).
-
----
+Hugging Face Inference Toolkit is for serving 🤗 Transformers models in containers. This library provides default pre-processing, predict and postprocessing for Transformers, Sentence Tranfsformers. It is also possible to define custom `handler.py` for customization. The Toolkit is build to work with the [Hugging Face Hub](https://huggingface.co/models) and is used as "default" option in [Inference Endpoints](https://ui.endpoints.huggingface.co/)
 
-## 💻  Getting Started with Hugging Face Inference Toolkit
+## 💻 Getting Started with Hugging Face Inference Toolkit
 
-* Clone the repository `git clone <https://github.com/huggingface/huggingface-inference-toolkit``>
-* Install the dependencies in dev mode `pip install -e ".[torch,st,diffusers,test,quality]"`
-  * If you develop on AWS inferentia2 install with `pip install -e ".[test,quality]" optimum-neuron[neuronx] --upgrade`
-  * If you develop on Google Cloud install with `pip install -e ".[torch,st,diffusers,google,test,quality]"`
-* Unit Testing: `make unit-test`
-* Integration testing: `make integ-test`
+- Clone the repository `git clone https://github.com/huggingface/huggingface-inference-toolkit`
+- Install the dependencies in dev mode `pip install -e ".[torch,st,diffusers,test,quality]"`
+  - If you develop on AWS inferentia2 install with `pip install -e ".[test,quality]" optimum-neuron[neuronx] --upgrade`
+  - If you develop on Google Cloud install with `pip install -e ".[torch,st,diffusers,google,test,quality]"`
+- Unit Testing: `make unit-test`
+- Integration testing: `make integ-test`
 
 ### Local run
 
@@ -68,18 +65,18 @@ curl --request POST \
 
 The Hugging Face Inference Toolkit allows user to provide a custom inference through a `handler.py` file which is located in the repository.
 
-For an example check [philschmid/custom-pipeline-text-classification](https://huggingface.co/philschmid/custom-pipeline-text-classification):  
+For an example check [philschmid/custom-pipeline-text-classification](https://huggingface.co/philschmid/custom-pipeline-text-classification):
 
 ```bash
 model.tar.gz/
 |- pytorch_model.bin
 |- ....
 |- handler.py
-|- requirements.txt 
+|- requirements.txt
 ```
 
 In this example, `pytroch_model.bin` is the model file saved from training, `handler.py` is the custom inference handler, and `requirements.txt` is a requirements file to add additional dependencies.
-The custom module can override the following methods:  
+The custom module can override the following methods:
 
 ### Vertex AI Support
 
@@ -136,9 +133,9 @@ curl --request POST \
 
 The Hugging Face Inference Toolkit provides support for deploying Hugging Face on AWS Inferentia2. To deploy a model on Inferentia2 you have 3 options:
 
-* Provide `HF_MODEL_ID`, the model repo id on huggingface.co which contains the compiled model under `.neuron` format e.g. `optimum/bge-base-en-v1.5-neuronx`
-* Provide the `HF_OPTIMUM_BATCH_SIZE` and `HF_OPTIMUM_SEQUENCE_LENGTH` environment variables to compile the model on the fly, e.g. `HF_OPTIMUM_BATCH_SIZE=1 HF_OPTIMUM_SEQUENCE_LENGTH=128`
-* Include `neuron` dictionary in the [config.json](https://huggingface.co/optimum/tiny_random_bert_neuron/blob/main/config.json) file in the model archive, e.g. `neuron: {"static_batch_size": 1, "static_sequence_length": 128}`
+- Provide `HF_MODEL_ID`, the model repo id on huggingface.co which contains the compiled model under `.neuron` format e.g. `optimum/bge-base-en-v1.5-neuronx`
+- Provide the `HF_OPTIMUM_BATCH_SIZE` and `HF_OPTIMUM_SEQUENCE_LENGTH` environment variables to compile the model on the fly, e.g. `HF_OPTIMUM_BATCH_SIZE=1 HF_OPTIMUM_SEQUENCE_LENGTH=128`
+- Include `neuron` dictionary in the [config.json](https://huggingface.co/optimum/tiny_random_bert_neuron/blob/main/config.json) file in the model archive, e.g. `neuron: {"static_batch_size": 1, "static_sequence_length": 128}`
 
 The currently supported tasks can be found [here](https://huggingface.co/docs/optimum-neuron/en/package_reference/supported_models). If you plan to deploy an LLM, we recommend taking a look at [Neuronx TGI](https://huggingface.co/blog/text-generation-inference-on-inferentia2), which is purposly build for LLMs.
 
@@ -148,14 +145,14 @@ Start Hugging Face Inference Toolkit with the following environment variables.
 
 _Note: You need to run this on an Inferentia2 instance._
 
-* transformers `text-classification` with `HF_OPTIMUM_BATCH_SIZE` and `HF_OPTIMUM_SEQUENCE_LENGTH`
+- transformers `text-classification` with `HF_OPTIMUM_BATCH_SIZE` and `HF_OPTIMUM_SEQUENCE_LENGTH`
 
 ```bash
 mkdir tmp2/
 HF_MODEL_ID="distilbert/distilbert-base-uncased-finetuned-sst-2-english" HF_TASK="text-classification" HF_OPTIMUM_BATCH_SIZE=1 HF_OPTIMUM_SEQUENCE_LENGTH=128  HF_MODEL_DIR=tmp2 uvicorn src.huggingface_inference_toolkit.webservice_starlette:app  --port 5000
 ```
 
-* sentence transformers `feature-extration` with `HF_OPTIMUM_BATCH_SIZE` and `HF_OPTIMUM_SEQUENCE_LENGTH`
+- sentence transformers `feature-extration` with `HF_OPTIMUM_BATCH_SIZE` and `HF_OPTIMUM_SEQUENCE_LENGTH`
 
 ```bash
 HF_MODEL_ID="sentence-transformers/all-MiniLM-L6-v2" HF_TASK="feature-extraction" HF_OPTIMUM_BATCH_SIZE=1 HF_OPTIMUM_SEQUENCE_LENGTH=128 HF_MODEL_DIR=tmp2 uvicorn src.huggingface_inference_toolkit.webservice_starlette:app  --port 5000
@@ -284,19 +281,13 @@ HF_OPTIMUM_SEQUENCE_LENGTH="128"
 
 ## ⚙ Supported Front-Ends
 
-* [x] Starlette (HF Endpoints)
-* [x] Starlette (Vertex AI)
-* [ ] Starlette (Azure ML)
-* [ ] Starlette (SageMaker)
-
----
-
-## 🤝 Contributing
-
----
+- [x] Starlette (HF Endpoints)
+- [x] Starlette (Vertex AI)
+- [ ] Starlette (Azure ML)
+- [ ] Starlette (SageMaker)
 
-## 📜  License
+## 📜 License
 
-TBD.
+This project is licensed under the Apache-2.0 License.
 
 ---
diff --git a/pyproject.toml b/pyproject.toml
index a692967f..29746fe6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ no_implicit_optional = true
 scripts_are_modules = true
 
 [tool.ruff]
-lint.select = [
+select = [
   "E", # pycodestyle errors
   "W", # pycodestyle warnings
   "F", # pyflakes
@@ -12,7 +12,7 @@ lint.select = [
   "C", # flake8-comprehensions
   "B", # flake8-bugbear
 ]
-lint.ignore = [
+ignore = [
   "E501", # Line length (handled by ruff-format)
   "B008", # do not perform function calls in argument defaults
   "C901", # too complex
@@ -21,13 +21,13 @@ lint.ignore = [
 line-length = 119
 
 # Allow unused variables when underscore-prefixed.
-lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
 
 # Assume Python 3.11.
 target-version = "py311"
 
-lint.per-file-ignores = {"__init__.py" = ["F401"]}
+per-file-ignores = { "__init__.py" = ["F401"] }
 
 [tool.isort]
 profile = "black"
-known_third_party = ["transformers", "starlette", "huggingface_hub"]
\ No newline at end of file
+known_third_party = ["transformers", "starlette", "huggingface_hub"]
diff --git a/setup.cfg b/setup.cfg
index 79d6ce9b..fab739da 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -10,7 +10,6 @@ known_third_party =
     datasets
     tensorflow
     torch
-    robyn
 
 line_length = 119
 lines_after_imports = 2
diff --git a/setup.py b/setup.py
index 83847136..bf361a62 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 # We don't declare our dependency on transformers here because we build with
 # different packages for different variants
 
-VERSION = "0.4.3"
+VERSION = "0.5.0"
 
 # Ubuntu packages
 # libsndfile1-dev: torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can be installed as follows: apt install libsndfile1-dev

From 4f388076fe47dbde6c4103523e22113e94f37d94 Mon Sep 17 00:00:00 2001
From: philschmid <schmidphilipp1995@gmail.com>
Date: Tue, 20 Aug 2024 14:00:43 +0200
Subject: [PATCH 2/4] make style happy

---
 tests/integ/conftest.py                  | 2 +-
 tests/integ/helpers.py                   | 2 +-
 tests/integ/test_pytorch_local_inf2.py   | 2 +-
 tests/unit/test_diffusers.py             | 5 +++--
 tests/unit/test_handler.py               | 3 ++-
 tests/unit/test_optimum_utils.py         | 3 ++-
 tests/unit/test_sentence_transformers.py | 3 ++-
 tests/unit/test_serializer.py            | 3 ++-
 tests/unit/test_utils.py                 | 5 +++--
 tests/unit/test_vertex_ai_utils.py       | 1 +
 10 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py
index 4b3f6118..d69a9f97 100644
--- a/tests/integ/conftest.py
+++ b/tests/integ/conftest.py
@@ -7,9 +7,9 @@
 import docker
 import pytest
 import tenacity
-from huggingface_inference_toolkit.utils import _load_repository_from_hf
 from transformers.testing_utils import _run_slow_tests
 
+from huggingface_inference_toolkit.utils import _load_repository_from_hf
 from tests.integ.config import task2model
 
 HF_HUB_CACHE = os.environ.get("HF_HUB_CACHE", "/home/ubuntu/.cache/huggingface/hub")
diff --git a/tests/integ/helpers.py b/tests/integ/helpers.py
index e9e5d808..afabaf71 100644
--- a/tests/integ/helpers.py
+++ b/tests/integ/helpers.py
@@ -8,9 +8,9 @@
 import pytest
 import requests
 from docker import DockerClient
-from huggingface_inference_toolkit.utils import _load_repository_from_hf
 from transformers.testing_utils import _run_slow_tests, require_tf, require_torch
 
+from huggingface_inference_toolkit.utils import _load_repository_from_hf
 from tests.integ.config import task2input, task2model, task2output, task2validation
 
 IS_GPU = _run_slow_tests
diff --git a/tests/integ/test_pytorch_local_inf2.py b/tests/integ/test_pytorch_local_inf2.py
index de0c7b4e..6de351fe 100644
--- a/tests/integ/test_pytorch_local_inf2.py
+++ b/tests/integ/test_pytorch_local_inf2.py
@@ -1,7 +1,7 @@
 import pytest
-from huggingface_inference_toolkit.optimum_utils import is_optimum_neuron_available
 from transformers.testing_utils import require_torch
 
+from huggingface_inference_toolkit.optimum_utils import is_optimum_neuron_available
 from tests.integ.helpers import verify_task
 
 require_inferentia = pytest.mark.skipif(
diff --git a/tests/unit/test_diffusers.py b/tests/unit/test_diffusers.py
index 890575da..b7f4a56d 100644
--- a/tests/unit/test_diffusers.py
+++ b/tests/unit/test_diffusers.py
@@ -1,11 +1,12 @@
 import logging
 import tempfile
 
-from huggingface_inference_toolkit.diffusers_utils import IEAutoPipelineForText2Image
-from huggingface_inference_toolkit.utils import _load_repository_from_hf, get_pipeline
 from PIL import Image
 from transformers.testing_utils import require_torch, slow
 
+from huggingface_inference_toolkit.diffusers_utils import IEAutoPipelineForText2Image
+from huggingface_inference_toolkit.utils import _load_repository_from_hf, get_pipeline
+
 logging.basicConfig(level="DEBUG")
 
 @require_torch
diff --git a/tests/unit/test_handler.py b/tests/unit/test_handler.py
index 44a8f818..052a5bfc 100644
--- a/tests/unit/test_handler.py
+++ b/tests/unit/test_handler.py
@@ -1,6 +1,8 @@
 import tempfile
 
 import pytest
+from transformers.testing_utils import require_tf, require_torch
+
 from huggingface_inference_toolkit.handler import (
     HuggingFaceHandler,
     get_inference_handler_either_custom_or_default_handler,
@@ -9,7 +11,6 @@
     _is_gpu_available,
     _load_repository_from_hf,
 )
-from transformers.testing_utils import require_tf, require_torch
 
 TASK = "text-classification"
 MODEL = "hf-internal-testing/tiny-random-distilbert"
diff --git a/tests/unit/test_optimum_utils.py b/tests/unit/test_optimum_utils.py
index 8014decc..075fdf81 100644
--- a/tests/unit/test_optimum_utils.py
+++ b/tests/unit/test_optimum_utils.py
@@ -2,13 +2,14 @@
 import tempfile
 
 import pytest
+from transformers.testing_utils import require_torch
+
 from huggingface_inference_toolkit.optimum_utils import (
     get_input_shapes,
     get_optimum_neuron_pipeline,
     is_optimum_neuron_available,
 )
 from huggingface_inference_toolkit.utils import _load_repository_from_hf
-from transformers.testing_utils import require_torch
 
 require_inferentia = pytest.mark.skipif(
     not is_optimum_neuron_available(),
diff --git a/tests/unit/test_sentence_transformers.py b/tests/unit/test_sentence_transformers.py
index f8556ed0..635f39de 100644
--- a/tests/unit/test_sentence_transformers.py
+++ b/tests/unit/test_sentence_transformers.py
@@ -1,5 +1,7 @@
 import tempfile
 
+from transformers.testing_utils import require_torch
+
 from huggingface_inference_toolkit.sentence_transformers_utils import (
     SentenceEmbeddingPipeline,
     get_sentence_transformers_pipeline,
@@ -8,7 +10,6 @@
     _load_repository_from_hf,
     get_pipeline,
 )
-from transformers.testing_utils import require_torch
 
 
 @require_torch
diff --git a/tests/unit/test_serializer.py b/tests/unit/test_serializer.py
index 0b53995d..8afcb28d 100644
--- a/tests/unit/test_serializer.py
+++ b/tests/unit/test_serializer.py
@@ -2,9 +2,10 @@
 
 import numpy as np
 import pytest
-from huggingface_inference_toolkit.serialization import Audioer, Imager, Jsoner
 from PIL import Image
 
+from huggingface_inference_toolkit.serialization import Audioer, Imager, Jsoner
+
 
 def test_json_serialization():
     t = {"res": np.array([2.0]), "text": "I like you.", "float": 1.2}
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index e7b3eef6..c0f1fef8 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -3,6 +3,9 @@
 import tempfile
 from pathlib import Path
 
+from transformers.file_utils import is_torch_available
+from transformers.testing_utils import require_tf, require_torch, slow
+
 from huggingface_inference_toolkit.handler import get_inference_handler_either_custom_or_default_handler
 from huggingface_inference_toolkit.utils import (
     _get_framework,
@@ -11,8 +14,6 @@
     check_and_register_custom_pipeline_from_directory,
     get_pipeline,
 )
-from transformers.file_utils import is_torch_available
-from transformers.testing_utils import require_tf, require_torch, slow
 
 TASK_MODEL = "sshleifer/tiny-dbmdz-bert-large-cased-finetuned-conll03-english"
 
diff --git a/tests/unit/test_vertex_ai_utils.py b/tests/unit/test_vertex_ai_utils.py
index 8f31d8b2..316a4088 100644
--- a/tests/unit/test_vertex_ai_utils.py
+++ b/tests/unit/test_vertex_ai_utils.py
@@ -20,6 +20,7 @@ def _load_repository_from_gcs(artifact_uri: str, target_dir: Path) -> str:
         import re
 
         from google.cloud import storage
+
         from huggingface_inference_toolkit.vertex_ai_utils import GCS_URI_PREFIX
 
         if isinstance(target_dir, str):

From e9374320e18fee65cf0f5000a6e9446f9576ebb3 Mon Sep 17 00:00:00 2001
From: Philipp Schmid <32632186+philschmid@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:32:25 +0200
Subject: [PATCH 3/4] Update README.md

Co-authored-by: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 96611e4f..c07aa5b1 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ Hugging Face Inference Toolkit is for serving 🤗 Transformers models in contai
 
 - Clone the repository `git clone https://github.com/huggingface/huggingface-inference-toolkit`
 - Install the dependencies in dev mode `pip install -e ".[torch,st,diffusers,test,quality]"`
-  - If you develop on AWS inferentia2 install with `pip install -e ".[test,quality]" optimum-neuron[neuronx] --upgrade`
+  - If you develop on AWS Inferentia2 install with `pip install -e ".[inf2,test,quality]" --upgrade`
   - If you develop on Google Cloud install with `pip install -e ".[torch,st,diffusers,google,test,quality]"`
 - Unit Testing: `make unit-test`
 - Integration testing: `make integ-test`

From 33b84b2d930ee02bef05d023813e92698ff56dcb Mon Sep 17 00:00:00 2001
From: Philipp Schmid <32632186+philschmid@users.noreply.github.com>
Date: Tue, 20 Aug 2024 14:32:30 +0200
Subject: [PATCH 4/4] Update README.md

Co-authored-by: Alvaro Bartolome <36760800+alvarobartt@users.noreply.github.com>
---
 README.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/README.md b/README.md
index c07aa5b1..63cef3f8 100644
--- a/README.md
+++ b/README.md
@@ -290,4 +290,3 @@ HF_OPTIMUM_SEQUENCE_LENGTH="128"
 
 This project is licensed under the Apache-2.0 License.
 
----