Release 0.4.3 version with sentence-transformers fixes (#81)

alvarobartt · web-flow · commit e70adff4e0c6 · 2024-08-13T12:19:51.000+02:00
* Pin `0.4.3dev0` version for upcoming release

* Fix `sentence-transformers` unit tests

* Move `peft` out of core dependencies

`peft` requires `torch` as a mandatory dependency, so adding `peft` as a
core dependency means that `torch` will be installed i.e. even when `pip
install -e ".[quality]"` which slows things a bit, since `torch`
installation takes time; and in some cases is not needed, so moving
`peft` into the `torch` extra makes the most sense for the moment (not
final).

* Fix `trust_remote_code` propagation for `sentence-transformers`

* Add missing return type-hint

* Fix `test__load_repository_from_gcs` to rely on `create_anonymous_client`

* Fix `kwargs` propagation for `sentence-transformers`

* Bump version to 0.4.3
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 # We don't declare our dependency on transformers here because we build with
 # different packages for different variants
 
-VERSION = "0.4.2"
+VERSION = "0.4.3"
 
 # Ubuntu packages
 # libsndfile1-dev: torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can be installed as follows: apt install libsndfile1-dev
@@ -15,7 +15,6 @@
 install_requires = [
     "transformers[sklearn,sentencepiece,audio,vision,sentencepiece]==4.44.0",
     "huggingface_hub[hf_transfer]==0.24.5",
-    "peft==0.12.0",
     # vision
     "Pillow",
     "librosa",
@@ -34,7 +33,9 @@
 
 extras["st"] = ["sentence_transformers==2.7.0"]
 extras["diffusers"] = ["diffusers==0.30.0", "accelerate==0.33.0"]
-extras["torch"] = ["torch==2.2.2", "torchvision", "torchaudio"]
+# Includes `peft` as PEFT requires `torch` so having `peft` as a core dependency
+# means that `torch` will be installed even if the `torch` extra is not specified.
+extras["torch"] = ["torch==2.2.2", "torchvision", "torchaudio", "peft==0.12.0"]
 extras["test"] = [
     "pytest==7.2.1",
     "pytest-xdist",
diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py
@@ -12,28 +12,30 @@ def is_sentence_transformers_available():
 
 
 class SentenceSimilarityPipeline:
-    def __init__(self, model_dir: str, device: str = None):  # needs "cuda" for GPU
-        self.model = SentenceTransformer(model_dir, device=device)
+    def __init__(self, model_dir: str, device: str = None, **kwargs):  # needs "cuda" for GPU
+        self.model = SentenceTransformer(model_dir, device=device, **kwargs)
 
     def __call__(self, inputs=None):
-        embeddings1 = self.model.encode(inputs["source_sentence"], convert_to_tensor=True)
+        embeddings1 = self.model.encode(
+            inputs["source_sentence"], convert_to_tensor=True
+        )
         embeddings2 = self.model.encode(inputs["sentences"], convert_to_tensor=True)
         similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0]
         return {"similarities": similarities}
 
 
 class SentenceEmbeddingPipeline:
-    def __init__(self, model_dir: str, device: str = None):  # needs "cuda" for GPU
-        self.model = SentenceTransformer(model_dir, device=device)
+    def __init__(self, model_dir: str, device: str = None, **kwargs):  # needs "cuda" for GPU
+        self.model = SentenceTransformer(model_dir, device=device, **kwargs)
 
     def __call__(self, inputs):
         embeddings = self.model.encode(inputs).tolist()
         return {"embeddings": embeddings}
 
 
 class RankingPipeline:
-    def __init__(self, model_dir: str, device: str = None):  # needs "cuda" for GPU
-        self.model = CrossEncoder(model_dir, device=device)
+    def __init__(self, model_dir: str, device: str = None, **kwargs):  # needs "cuda" for GPU
+        self.model = CrossEncoder(model_dir, device=device, **kwargs)
 
     def __call__(self, inputs):
         scores = self.model.predict(inputs).tolist()
@@ -47,12 +49,16 @@ def __call__(self, inputs):
 }
 
 
-def get_sentence_transformers_pipeline(
-    task=None,
-    model_dir=None,
-    device=-1,
-    **kwargs
-):
+def get_sentence_transformers_pipeline(task=None, model_dir=None, device=-1, **kwargs):
     device = "cuda" if device == 0 else "cpu"
-    pipeline = SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir, device=device, **kwargs)
-    return pipeline
+
+    kwargs.pop("tokenizer", None)
+    kwargs.pop("framework", None)
+
+    if task not in SENTENCE_TRANSFORMERS_TASKS:
+        raise ValueError(
+            f"Unknown task {task}. Available tasks are: {', '.join(SENTENCE_TRANSFORMERS_TASKS.keys())}"
+        )
+    return SENTENCE_TRANSFORMERS_TASKS[task](
+        model_dir=model_dir, device=device, **kwargs
+    )
diff --git a/src/huggingface_inference_toolkit/vertex_ai_utils.py b/src/huggingface_inference_toolkit/vertex_ai_utils.py
@@ -8,7 +8,9 @@
 
 
 # copied from https://github.com/googleapis/python-aiplatform/blob/94d838d8cfe1599bc2d706e66080c05108821986/google/cloud/aiplatform/utils/prediction_utils.py#L121
-def _load_repository_from_gcs(artifact_uri: str, target_dir: Union[str, Path] = "/tmp"):
+def _load_repository_from_gcs(
+    artifact_uri: str, target_dir: Union[str, Path] = "/tmp"
+) -> str:
     """
     Load files from GCS path to target_dir
     """
diff --git a/tests/unit/test_vertex_ai_utils.py b/tests/unit/test_vertex_ai_utils.py
@@ -1,10 +1,10 @@
 from pathlib import Path
 
-from huggingface_inference_toolkit.vertex_ai_utils import _load_repository_from_gcs
-
 
 def test__load_repository_from_gcs():
-    """Tests the `_load_repository_from_gcs` function against a public artifact URI.
+    """Tests the `_load_repository_from_gcs` function against a public artifact URI. But the
+    function is overriden since the client needs to be anonymous temporarily, as we're testing
+    against a publicly accessible artifact.
 
     References:
         - https://cloud.google.com/storage/docs/public-datasets/era5
@@ -14,6 +14,38 @@ def test__load_repository_from_gcs():
     public_artifact_uri = (
         "gs://gcp-public-data-arco-era5/raw/date-variable-static/2021/12/31/soil_type"
     )
+
+    def _load_repository_from_gcs(artifact_uri: str, target_dir: Path) -> str:
+        """Temporarily override of the `_load_repository_from_gcs` function."""
+        import re
+
+        from google.cloud import storage
+        from huggingface_inference_toolkit.vertex_ai_utils import GCS_URI_PREFIX
+
+        if isinstance(target_dir, str):
+            target_dir = Path(target_dir)
+
+        if artifact_uri.startswith(GCS_URI_PREFIX):
+            matches = re.match(f"{GCS_URI_PREFIX}(.*?)/(.*)", artifact_uri)
+            bucket_name, prefix = matches.groups()  # type: ignore
+
+            gcs_client = storage.Client.create_anonymous_client()
+            blobs = gcs_client.list_blobs(bucket_name, prefix=prefix)
+            for blob in blobs:
+                name_without_prefix = blob.name[len(prefix) :]
+                name_without_prefix = (
+                    name_without_prefix[1:]
+                    if name_without_prefix.startswith("/")
+                    else name_without_prefix
+                )
+                file_split = name_without_prefix.split("/")
+                directory = target_dir / Path(*file_split[0:-1])
+                directory.mkdir(parents=True, exist_ok=True)
+                if name_without_prefix and not name_without_prefix.endswith("/"):
+                    blob.download_to_filename(target_dir / name_without_prefix)
+
+        return str(target_dir.absolute())
+
     target_dir = Path.cwd() / "target"
     target_dir_path = _load_repository_from_gcs(
         artifact_uri=public_artifact_uri, target_dir=target_dir