diff --git a/setup.py b/setup.py index 3458aa57..83847136 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ # We don't declare our dependency on transformers here because we build with # different packages for different variants -VERSION = "0.4.2" +VERSION = "0.4.3" # Ubuntu packages # libsndfile1-dev: torchaudio requires the development version of the libsndfile package which can be installed via a system package manager. On Ubuntu it can be installed as follows: apt install libsndfile1-dev @@ -15,7 +15,6 @@ install_requires = [ "transformers[sklearn,sentencepiece,audio,vision,sentencepiece]==4.44.0", "huggingface_hub[hf_transfer]==0.24.5", - "peft==0.12.0", # vision "Pillow", "librosa", @@ -34,7 +33,9 @@ extras["st"] = ["sentence_transformers==2.7.0"] extras["diffusers"] = ["diffusers==0.30.0", "accelerate==0.33.0"] -extras["torch"] = ["torch==2.2.2", "torchvision", "torchaudio"] +# Includes `peft` as PEFT requires `torch` so having `peft` as a core dependency +# means that `torch` will be installed even if the `torch` extra is not specified. +extras["torch"] = ["torch==2.2.2", "torchvision", "torchaudio", "peft==0.12.0"] extras["test"] = [ "pytest==7.2.1", "pytest-xdist", diff --git a/src/huggingface_inference_toolkit/sentence_transformers_utils.py b/src/huggingface_inference_toolkit/sentence_transformers_utils.py index a4df0955..6b55ae76 100644 --- a/src/huggingface_inference_toolkit/sentence_transformers_utils.py +++ b/src/huggingface_inference_toolkit/sentence_transformers_utils.py @@ -12,19 +12,21 @@ def is_sentence_transformers_available(): class SentenceSimilarityPipeline: - def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU - self.model = SentenceTransformer(model_dir, device=device) + def __init__(self, model_dir: str, device: str = None, **kwargs): # needs "cuda" for GPU + self.model = SentenceTransformer(model_dir, device=device, **kwargs) def __call__(self, inputs=None): - embeddings1 = self.model.encode(inputs["source_sentence"], convert_to_tensor=True) + embeddings1 = self.model.encode( + inputs["source_sentence"], convert_to_tensor=True + ) embeddings2 = self.model.encode(inputs["sentences"], convert_to_tensor=True) similarities = util.pytorch_cos_sim(embeddings1, embeddings2).tolist()[0] return {"similarities": similarities} class SentenceEmbeddingPipeline: - def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU - self.model = SentenceTransformer(model_dir, device=device) + def __init__(self, model_dir: str, device: str = None, **kwargs): # needs "cuda" for GPU + self.model = SentenceTransformer(model_dir, device=device, **kwargs) def __call__(self, inputs): embeddings = self.model.encode(inputs).tolist() @@ -32,8 +34,8 @@ def __call__(self, inputs): class RankingPipeline: - def __init__(self, model_dir: str, device: str = None): # needs "cuda" for GPU - self.model = CrossEncoder(model_dir, device=device) + def __init__(self, model_dir: str, device: str = None, **kwargs): # needs "cuda" for GPU + self.model = CrossEncoder(model_dir, device=device, **kwargs) def __call__(self, inputs): scores = self.model.predict(inputs).tolist() @@ -47,12 +49,16 @@ def __call__(self, inputs): } -def get_sentence_transformers_pipeline( - task=None, - model_dir=None, - device=-1, - **kwargs -): +def get_sentence_transformers_pipeline(task=None, model_dir=None, device=-1, **kwargs): device = "cuda" if device == 0 else "cpu" - pipeline = SENTENCE_TRANSFORMERS_TASKS[task](model_dir=model_dir, device=device, **kwargs) - return pipeline + + kwargs.pop("tokenizer", None) + kwargs.pop("framework", None) + + if task not in SENTENCE_TRANSFORMERS_TASKS: + raise ValueError( + f"Unknown task {task}. Available tasks are: {', '.join(SENTENCE_TRANSFORMERS_TASKS.keys())}" + ) + return SENTENCE_TRANSFORMERS_TASKS[task]( + model_dir=model_dir, device=device, **kwargs + ) diff --git a/src/huggingface_inference_toolkit/vertex_ai_utils.py b/src/huggingface_inference_toolkit/vertex_ai_utils.py index 885c2d07..9a569505 100644 --- a/src/huggingface_inference_toolkit/vertex_ai_utils.py +++ b/src/huggingface_inference_toolkit/vertex_ai_utils.py @@ -8,7 +8,9 @@ # copied from https://github.com/googleapis/python-aiplatform/blob/94d838d8cfe1599bc2d706e66080c05108821986/google/cloud/aiplatform/utils/prediction_utils.py#L121 -def _load_repository_from_gcs(artifact_uri: str, target_dir: Union[str, Path] = "/tmp"): +def _load_repository_from_gcs( + artifact_uri: str, target_dir: Union[str, Path] = "/tmp" +) -> str: """ Load files from GCS path to target_dir """ diff --git a/tests/unit/test_vertex_ai_utils.py b/tests/unit/test_vertex_ai_utils.py index eca64f29..8f31d8b2 100644 --- a/tests/unit/test_vertex_ai_utils.py +++ b/tests/unit/test_vertex_ai_utils.py @@ -1,10 +1,10 @@ from pathlib import Path -from huggingface_inference_toolkit.vertex_ai_utils import _load_repository_from_gcs - def test__load_repository_from_gcs(): - """Tests the `_load_repository_from_gcs` function against a public artifact URI. + """Tests the `_load_repository_from_gcs` function against a public artifact URI. But the + function is overriden since the client needs to be anonymous temporarily, as we're testing + against a publicly accessible artifact. References: - https://cloud.google.com/storage/docs/public-datasets/era5 @@ -14,6 +14,38 @@ def test__load_repository_from_gcs(): public_artifact_uri = ( "gs://gcp-public-data-arco-era5/raw/date-variable-static/2021/12/31/soil_type" ) + + def _load_repository_from_gcs(artifact_uri: str, target_dir: Path) -> str: + """Temporarily override of the `_load_repository_from_gcs` function.""" + import re + + from google.cloud import storage + from huggingface_inference_toolkit.vertex_ai_utils import GCS_URI_PREFIX + + if isinstance(target_dir, str): + target_dir = Path(target_dir) + + if artifact_uri.startswith(GCS_URI_PREFIX): + matches = re.match(f"{GCS_URI_PREFIX}(.*?)/(.*)", artifact_uri) + bucket_name, prefix = matches.groups() # type: ignore + + gcs_client = storage.Client.create_anonymous_client() + blobs = gcs_client.list_blobs(bucket_name, prefix=prefix) + for blob in blobs: + name_without_prefix = blob.name[len(prefix) :] + name_without_prefix = ( + name_without_prefix[1:] + if name_without_prefix.startswith("/") + else name_without_prefix + ) + file_split = name_without_prefix.split("/") + directory = target_dir / Path(*file_split[0:-1]) + directory.mkdir(parents=True, exist_ok=True) + if name_without_prefix and not name_without_prefix.endswith("/"): + blob.download_to_filename(target_dir / name_without_prefix) + + return str(target_dir.absolute()) + target_dir = Path.cwd() / "target" target_dir_path = _load_repository_from_gcs( artifact_uri=public_artifact_uri, target_dir=target_dir